diff --git a/client/tomcatconf/applicationContext.xml.in b/client/tomcatconf/applicationContext.xml.in
index 0f551344b7d..b832b09c9c2 100644
--- a/client/tomcatconf/applicationContext.xml.in
+++ b/client/tomcatconf/applicationContext.xml.in
@@ -481,6 +481,10 @@
+
+
+
+
diff --git a/client/tomcatconf/componentContext.xml.in b/client/tomcatconf/componentContext.xml.in
index 1fbec61ba66..5ca07502a22 100644
--- a/client/tomcatconf/componentContext.xml.in
+++ b/client/tomcatconf/componentContext.xml.in
@@ -173,6 +173,7 @@
+
diff --git a/plugins/hypervisors/kvm/src/com/cloud/ha/KVMInvestigator.java b/plugins/hypervisors/kvm/src/com/cloud/ha/KVMInvestigator.java
new file mode 100644
index 00000000000..65024f8444f
--- /dev/null
+++ b/plugins/hypervisors/kvm/src/com/cloud/ha/KVMInvestigator.java
@@ -0,0 +1,72 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package com.cloud.ha;
+
+import com.cloud.agent.AgentManager;
+import com.cloud.agent.api.Answer;
+import com.cloud.agent.api.CheckOnHostCommand;
+import com.cloud.host.Host;
+import com.cloud.host.HostVO;
+import com.cloud.host.Status;
+import com.cloud.host.dao.HostDao;
+import com.cloud.hypervisor.Hypervisor;
+import com.cloud.resource.ResourceManager;
+import com.cloud.utils.component.AdapterBase;
+import org.apache.log4j.Logger;
+
+import javax.inject.Inject;
+import java.util.List;
+
+public class KVMInvestigator extends AdapterBase implements Investigator {
+ private final static Logger s_logger = Logger.getLogger(KVMInvestigator.class);
+ @Inject
+ HostDao _hostDao;
+ @Inject
+ AgentManager _agentMgr;
+ @Inject
+ ResourceManager _resourceMgr;
+ @Override
+ public Boolean isVmAlive(com.cloud.vm.VirtualMachine vm, Host host) {
+ Status status = isAgentAlive(host);
+ if (status == null) {
+ return null;
+ }
+ return status == Status.Up ? true : null;
+ }
+
+ @Override
+ public Status isAgentAlive(Host agent) {
+ if (agent.getHypervisorType() != Hypervisor.HypervisorType.KVM) {
+ return null;
+ }
+ CheckOnHostCommand cmd = new CheckOnHostCommand(agent);
+ List neighbors = _resourceMgr.listAllHostsInCluster(agent.getClusterId());
+ for (HostVO neighbor : neighbors) {
+ if (neighbor.getId() == agent.getId() || neighbor.getHypervisorType() != Hypervisor.HypervisorType.KVM) {
+ continue;
+ }
+ Answer answer = _agentMgr.easySend(neighbor.getId(), cmd);
+
+ return answer.getResult() ? Status.Down : Status.Up;
+
+ }
+
+ return null;
+ }
+}
diff --git a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java
index 0aba15263bf..542136adf6f 100755
--- a/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java
+++ b/plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java
@@ -55,6 +55,7 @@ import java.util.concurrent.Future;
import javax.ejb.Local;
import javax.naming.ConfigurationException;
+import com.cloud.agent.api.CheckOnHostCommand;
import org.apache.commons.io.FileUtils;
import org.apache.log4j.Logger;
import org.libvirt.Connect;
@@ -1278,6 +1279,8 @@ ServerResource {
return storageHandler.handleStorageCommands((StorageSubSystemCommand)cmd);
} else if (cmd instanceof PvlanSetupCommand) {
return execute((PvlanSetupCommand) cmd);
+ } else if (cmd instanceof CheckOnHostCommand) {
+ return execute((CheckOnHostCommand)cmd);
} else {
s_logger.warn("Unsupported command ");
return Answer.createUnsupportedCommandAnswer(cmd);
@@ -1411,6 +1414,26 @@ ServerResource {
}
+ protected Answer execute(CheckOnHostCommand cmd) {
+ ExecutorService executors = Executors.newSingleThreadExecutor();
+ List pools = _monitor.getStoragePools();
+ KVMHAChecker ha = new KVMHAChecker(pools, cmd.getHost().getPrivateNetwork().getIp());
+ Future future = executors.submit(ha);
+ try {
+ Boolean result = future.get();
+ if (result) {
+ return new Answer(cmd, false, "Heart is still beating...");
+ } else {
+ return new Answer(cmd);
+ }
+ } catch (InterruptedException e) {
+ return new Answer(cmd, false, "can't get status of host:");
+ } catch (ExecutionException e) {
+ return new Answer(cmd, false, "can't get status of host:");
+ }
+
+ }
+
protected Storage.StorageResourceType getStorageResourceType() {
return Storage.StorageResourceType.STORAGE_POOL;
}
diff --git a/server/src/com/cloud/agent/manager/AgentManagerImpl.java b/server/src/com/cloud/agent/manager/AgentManagerImpl.java
index b1b862d3674..cf59b31273b 100755
--- a/server/src/com/cloud/agent/manager/AgentManagerImpl.java
+++ b/server/src/com/cloud/agent/manager/AgentManagerImpl.java
@@ -521,19 +521,19 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
ConnectionException ce = (ConnectionException)e;
if (ce.isSetupError()) {
s_logger.warn("Monitor " + monitor.second().getClass().getSimpleName() + " says there is an error in the connect process for " + hostId + " due to " + e.getMessage());
- handleDisconnectWithoutInvestigation(attache, Event.AgentDisconnected, true);
+ handleDisconnectWithoutInvestigation(attache, Event.AgentDisconnected, true, true);
throw ce;
} else {
s_logger.info("Monitor " + monitor.second().getClass().getSimpleName() + " says not to continue the connect process for " + hostId + " due to " + e.getMessage());
- handleDisconnectWithoutInvestigation(attache, Event.ShutdownRequested, true);
+ handleDisconnectWithoutInvestigation(attache, Event.ShutdownRequested, true, true);
return attache;
}
} else if (e instanceof HypervisorVersionChangedException) {
- handleDisconnectWithoutInvestigation(attache, Event.ShutdownRequested, true);
+ handleDisconnectWithoutInvestigation(attache, Event.ShutdownRequested, true, true);
throw new CloudRuntimeException("Unable to connect " + attache.getId(), e);
} else {
s_logger.error("Monitor " + monitor.second().getClass().getSimpleName() + " says there is an error in the connect process for " + hostId + " due to " + e.getMessage(), e);
- handleDisconnectWithoutInvestigation(attache, Event.AgentDisconnected, true);
+ handleDisconnectWithoutInvestigation(attache, Event.AgentDisconnected, true, true);
throw new CloudRuntimeException("Unable to connect " + attache.getId(), e);
}
}
@@ -547,7 +547,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
// this is tricky part for secondary storage
// make it as disconnected, wait for secondary storage VM to be up
// return the attache instead of null, even it is disconnectede
- handleDisconnectWithoutInvestigation(attache, Event.AgentDisconnected, true);
+ handleDisconnectWithoutInvestigation(attache, Event.AgentDisconnected, true, true);
}
agentStatusTransitTo(host, Event.Ready, _nodeId);
@@ -744,7 +744,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
return true;
}
- protected boolean handleDisconnectWithoutInvestigation(AgentAttache attache, Status.Event event, boolean transitState) {
+ protected boolean handleDisconnectWithoutInvestigation(AgentAttache attache, Status.Event event, boolean transitState, boolean removeAgent) {
long hostId = attache.getId();
s_logger.info("Host " + hostId + " is disconnecting with event " + event);
@@ -779,8 +779,10 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
s_logger.debug("Deregistering link for " + hostId + " with state " + nextStatus);
}
- //remove the attache
- removeAgent(attache, nextStatus);
+ //For KVM, if removeagent is false, don't remove the agent in agentmap, pingtimeout will pick it up.
+ if (host.getHypervisorType() != HypervisorType.KVM || removeAgent) {
+ removeAgent(attache, nextStatus);
+ }
//update the DB
if (host != null && transitState) {
@@ -859,7 +861,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
}
}
- handleDisconnectWithoutInvestigation(attache, event, true);
+ handleDisconnectWithoutInvestigation(attache, event, true, true);
host = _hostDao.findById(host.getId());
if (host.getStatus() == Status.Alert || host.getStatus() == Status.Down) {
_haMgr.scheduleRestartForVmsOnHost(host, true);
@@ -885,7 +887,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
if (_investigate == true) {
handleDisconnectWithInvestigation(_attache, _event);
} else {
- handleDisconnectWithoutInvestigation(_attache, _event, true);
+ handleDisconnectWithoutInvestigation(_attache, _event, true, false);
}
} catch (final Exception e) {
s_logger.error("Exception caught while handling disconnect: ", e);
@@ -979,7 +981,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
AgentAttache attache = null;
attache = findAttache(hostId);
if (attache != null) {
- handleDisconnectWithoutInvestigation(attache, Event.AgentDisconnected, true);
+ handleDisconnectWithoutInvestigation(attache, Event.AgentDisconnected, true, true);
}
return true;
} else if (event == Event.ShutdownRequested) {
diff --git a/server/src/com/cloud/agent/manager/ClusteredAgentManagerImpl.java b/server/src/com/cloud/agent/manager/ClusteredAgentManagerImpl.java
index bc72aff054d..0c3d6e1b9a8 100755
--- a/server/src/com/cloud/agent/manager/ClusteredAgentManagerImpl.java
+++ b/server/src/com/cloud/agent/manager/ClusteredAgentManagerImpl.java
@@ -310,19 +310,19 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
}
@Override
- protected boolean handleDisconnectWithoutInvestigation(AgentAttache attache, Status.Event event, boolean transitState) {
- return handleDisconnect(attache, event, false, true);
+ protected boolean handleDisconnectWithoutInvestigation(AgentAttache attache, Status.Event event, boolean transitState, boolean removeAgent) {
+ return handleDisconnect(attache, event, false, true, removeAgent);
}
@Override
protected boolean handleDisconnectWithInvestigation(AgentAttache attache, Status.Event event) {
- return handleDisconnect(attache, event, true, true);
+ return handleDisconnect(attache, event, true, true, true);
}
- protected boolean handleDisconnect(AgentAttache agent, Status.Event event, boolean investigate, boolean broadcast) {
+ protected boolean handleDisconnect(AgentAttache agent, Status.Event event, boolean investigate, boolean broadcast, boolean removeAgent) {
boolean res;
if (!investigate) {
- res = super.handleDisconnectWithoutInvestigation(agent, event, true);
+ res = super.handleDisconnectWithoutInvestigation(agent, event, true, removeAgent);
} else {
res = super.handleDisconnectWithInvestigation(agent, event);
}
@@ -365,7 +365,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
return true;
}
- return super.handleDisconnectWithoutInvestigation(attache, Event.AgentDisconnected, false);
+ return super.handleDisconnectWithoutInvestigation(attache, Event.AgentDisconnected, false, true);
}
return true;
@@ -1058,7 +1058,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
AgentAttache attache = findAttache(hostId);
if (attache != null) {
- result = handleDisconnect(attache, Event.AgentDisconnected, false, false);
+ result = handleDisconnect(attache, Event.AgentDisconnected, false, false, true);
}
if (result) {
@@ -1134,7 +1134,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
try {
s_logger.debug("Management server " + _nodeId + " failed to rebalance agent " + hostId);
_hostTransferDao.completeAgentTransfer(hostId);
- handleDisconnectWithoutInvestigation(findAttache(hostId), Event.RebalanceFailed, true);
+ handleDisconnectWithoutInvestigation(findAttache(hostId), Event.RebalanceFailed, true, true);
} catch (Exception ex) {
s_logger.warn("Failed to reconnect host id=" + hostId + " as a part of failed rebalance task cleanup");
}
@@ -1151,7 +1151,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
synchronized (_agents) {
ClusteredDirectAgentAttache attache = (ClusteredDirectAgentAttache)_agents.get(hostId);
if (attache != null && attache.getQueueSize() == 0 && attache.getNonRecurringListenersSize() == 0) {
- handleDisconnectWithoutInvestigation(attache, Event.StartAgentRebalance, true);
+ handleDisconnectWithoutInvestigation(attache, Event.StartAgentRebalance, true, true);
ClusteredAgentAttache forwardAttache = (ClusteredAgentAttache)createAttache(hostId);
if (forwardAttache == null) {
s_logger.warn("Unable to create a forward attache for the host " + hostId + " as a part of rebalance process");