From d55d45348c978294d0ba28de146160769d87ba81 Mon Sep 17 00:00:00 2001 From: Rohit Yadav Date: Mon, 1 Jun 2015 14:53:58 +0200 Subject: [PATCH] CLOUDSTACK-8530: KVM hosts without active agents should be in Disconnected state KVM hosts which are actuall up, but if their agents are shutdown should be put in disconnected state. This would avoid getting the VMs HA'd and other commands such as deploying a VM will exclude that host and save us from errors. The improvement is that, we first try to contact the KVM host itself. If it fails we assume that it's disconnected, and then ask its KVM neighbours if they can check its status. If all of the KVM neighbours tell us that it's Down and we're unable to reach the KVM host, then the host is possibly down. In case any of the KVM neighbours tell us that it's Up but we're unable to reach the KVM host then we can be sure that the agent is offline but the host is running. Signed-off-by: Rohit Yadav This closes #340 --- .../kvm/src/com/cloud/ha/KVMInvestigator.java | 43 ++++++++++++++----- 1 file changed, 33 insertions(+), 10 deletions(-) diff --git a/plugins/hypervisors/kvm/src/com/cloud/ha/KVMInvestigator.java b/plugins/hypervisors/kvm/src/com/cloud/ha/KVMInvestigator.java index e750ced7e11..b816d09dbce 100644 --- a/plugins/hypervisors/kvm/src/com/cloud/ha/KVMInvestigator.java +++ b/plugins/hypervisors/kvm/src/com/cloud/ha/KVMInvestigator.java @@ -18,13 +18,6 @@ */ package com.cloud.ha; -import java.util.List; - -import javax.ejb.Local; -import javax.inject.Inject; - -import org.apache.log4j.Logger; - import com.cloud.agent.AgentManager; import com.cloud.agent.api.Answer; import com.cloud.agent.api.CheckOnHostCommand; @@ -35,6 +28,11 @@ import com.cloud.host.dao.HostDao; import com.cloud.hypervisor.Hypervisor; import com.cloud.resource.ResourceManager; import com.cloud.utils.component.AdapterBase; +import org.apache.log4j.Logger; + +import javax.ejb.Local; +import javax.inject.Inject; +import java.util.List; @Local(value = Investigator.class) public class KVMInvestigator extends AdapterBase implements Investigator { @@ -60,22 +58,47 @@ public class KVMInvestigator extends AdapterBase implements Investigator { if (agent.getHypervisorType() != Hypervisor.HypervisorType.KVM && agent.getHypervisorType() != Hypervisor.HypervisorType.LXC) { return null; } + Status hostStatus = null; + Status neighbourStatus = null; CheckOnHostCommand cmd = new CheckOnHostCommand(agent); + + try { + Answer answer = _agentMgr.easySend(agent.getId(), cmd); + if (answer != null) { + hostStatus = answer.getResult() ? Status.Down : Status.Up; + } + } catch (Exception e) { + s_logger.debug("Failed to send command to host: " + agent.getId()); + } + if (hostStatus == null) { + hostStatus = Status.Disconnected; + } + List neighbors = _resourceMgr.listHostsInClusterByStatus(agent.getClusterId(), Status.Up); for (HostVO neighbor : neighbors) { if (neighbor.getId() == agent.getId() || (neighbor.getHypervisorType() != Hypervisor.HypervisorType.KVM && neighbor.getHypervisorType() != Hypervisor.HypervisorType.LXC)) { continue; } + s_logger.debug("Investigating host:" + agent.getId() + " via neighbouring host:" + neighbor.getId()); try { Answer answer = _agentMgr.easySend(neighbor.getId(), cmd); if (answer != null) { - return answer.getResult() ? Status.Down : Status.Up; + neighbourStatus = answer.getResult() ? Status.Down : Status.Up; + s_logger.debug("Neighbouring host:" + neighbor.getId() + " returned status:" + neighbourStatus + " for the investigated host:" + agent.getId()); + if (neighbourStatus == Status.Up) { + break; + } } } catch (Exception e) { s_logger.debug("Failed to send command to host: " + neighbor.getId()); } } - - return null; + if (neighbourStatus == Status.Up && (hostStatus == Status.Disconnected || hostStatus == Status.Down)) { + hostStatus = Status.Disconnected; + } + if (neighbourStatus == Status.Down && (hostStatus == Status.Disconnected || hostStatus == Status.Down)) { + hostStatus = Status.Down; + } + return hostStatus; } }