mirror of https://github.com/apache/cloudstack.git
CLOUDSTACK-3535: add kvminvestigator to investirage kvm host
Conflicts: plugins/hypervisors/kvm/src/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java
This commit is contained in:
parent
25fa21f190
commit
5b0314fff9
|
|
@ -481,6 +481,10 @@
|
|||
<property name="name" value="XenServerInvestigator"/>
|
||||
</bean>
|
||||
|
||||
<bean id="KVMInvestigator" class="com.cloud.ha.KVMInvestigator">
|
||||
<property name="name" value="KVMInvestigator"/>
|
||||
</bean>
|
||||
|
||||
<bean id="UserVmDomRInvestigator" class="com.cloud.ha.UserVmDomRInvestigator">
|
||||
<property name="name" value="PingInvestigator"/>
|
||||
</bean>
|
||||
|
|
|
|||
|
|
@ -173,6 +173,7 @@
|
|||
<ref bean="XenServerInvestigator"/>
|
||||
<ref bean="UserVmDomRInvestigator"/>
|
||||
<ref bean="ManagementIPSystemVMInvestigator"/>
|
||||
<ref bean="KVMInvestigator"/>
|
||||
</list>
|
||||
</property>
|
||||
</bean>
|
||||
|
|
|
|||
|
|
@ -0,0 +1,72 @@
|
|||
/*
|
||||
* Licensed to the Apache Software Foundation (ASF) under one
|
||||
* or more contributor license agreements. See the NOTICE file
|
||||
* distributed with this work for additional information
|
||||
* regarding copyright ownership. The ASF licenses this file
|
||||
* to you under the Apache License, Version 2.0 (the
|
||||
* "License"); you may not use this file except in compliance
|
||||
* with the License. You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing,
|
||||
* software distributed under the License is distributed on an
|
||||
* "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
* KIND, either express or implied. See the License for the
|
||||
* specific language governing permissions and limitations
|
||||
* under the License.
|
||||
*/
|
||||
package com.cloud.ha;
|
||||
|
||||
import com.cloud.agent.AgentManager;
|
||||
import com.cloud.agent.api.Answer;
|
||||
import com.cloud.agent.api.CheckOnHostCommand;
|
||||
import com.cloud.host.Host;
|
||||
import com.cloud.host.HostVO;
|
||||
import com.cloud.host.Status;
|
||||
import com.cloud.host.dao.HostDao;
|
||||
import com.cloud.hypervisor.Hypervisor;
|
||||
import com.cloud.resource.ResourceManager;
|
||||
import com.cloud.utils.component.AdapterBase;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import javax.inject.Inject;
|
||||
import java.util.List;
|
||||
|
||||
public class KVMInvestigator extends AdapterBase implements Investigator {
|
||||
private final static Logger s_logger = Logger.getLogger(KVMInvestigator.class);
|
||||
@Inject
|
||||
HostDao _hostDao;
|
||||
@Inject
|
||||
AgentManager _agentMgr;
|
||||
@Inject
|
||||
ResourceManager _resourceMgr;
|
||||
@Override
|
||||
public Boolean isVmAlive(com.cloud.vm.VirtualMachine vm, Host host) {
|
||||
Status status = isAgentAlive(host);
|
||||
if (status == null) {
|
||||
return null;
|
||||
}
|
||||
return status == Status.Up ? true : null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Status isAgentAlive(Host agent) {
|
||||
if (agent.getHypervisorType() != Hypervisor.HypervisorType.KVM) {
|
||||
return null;
|
||||
}
|
||||
CheckOnHostCommand cmd = new CheckOnHostCommand(agent);
|
||||
List<HostVO> neighbors = _resourceMgr.listAllHostsInCluster(agent.getClusterId());
|
||||
for (HostVO neighbor : neighbors) {
|
||||
if (neighbor.getId() == agent.getId() || neighbor.getHypervisorType() != Hypervisor.HypervisorType.KVM) {
|
||||
continue;
|
||||
}
|
||||
Answer answer = _agentMgr.easySend(neighbor.getId(), cmd);
|
||||
|
||||
return answer.getResult() ? Status.Down : Status.Up;
|
||||
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
}
|
||||
|
|
@ -55,6 +55,7 @@ import java.util.concurrent.Future;
|
|||
import javax.ejb.Local;
|
||||
import javax.naming.ConfigurationException;
|
||||
|
||||
import com.cloud.agent.api.CheckOnHostCommand;
|
||||
import org.apache.commons.io.FileUtils;
|
||||
import org.apache.log4j.Logger;
|
||||
import org.libvirt.Connect;
|
||||
|
|
@ -1278,6 +1279,8 @@ ServerResource {
|
|||
return storageHandler.handleStorageCommands((StorageSubSystemCommand)cmd);
|
||||
} else if (cmd instanceof PvlanSetupCommand) {
|
||||
return execute((PvlanSetupCommand) cmd);
|
||||
} else if (cmd instanceof CheckOnHostCommand) {
|
||||
return execute((CheckOnHostCommand)cmd);
|
||||
} else {
|
||||
s_logger.warn("Unsupported command ");
|
||||
return Answer.createUnsupportedCommandAnswer(cmd);
|
||||
|
|
@ -1411,6 +1414,26 @@ ServerResource {
|
|||
|
||||
}
|
||||
|
||||
protected Answer execute(CheckOnHostCommand cmd) {
|
||||
ExecutorService executors = Executors.newSingleThreadExecutor();
|
||||
List<NfsStoragePool> pools = _monitor.getStoragePools();
|
||||
KVMHAChecker ha = new KVMHAChecker(pools, cmd.getHost().getPrivateNetwork().getIp());
|
||||
Future<Boolean> future = executors.submit(ha);
|
||||
try {
|
||||
Boolean result = future.get();
|
||||
if (result) {
|
||||
return new Answer(cmd, false, "Heart is still beating...");
|
||||
} else {
|
||||
return new Answer(cmd);
|
||||
}
|
||||
} catch (InterruptedException e) {
|
||||
return new Answer(cmd, false, "can't get status of host:");
|
||||
} catch (ExecutionException e) {
|
||||
return new Answer(cmd, false, "can't get status of host:");
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
protected Storage.StorageResourceType getStorageResourceType() {
|
||||
return Storage.StorageResourceType.STORAGE_POOL;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -521,19 +521,19 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
|
|||
ConnectionException ce = (ConnectionException)e;
|
||||
if (ce.isSetupError()) {
|
||||
s_logger.warn("Monitor " + monitor.second().getClass().getSimpleName() + " says there is an error in the connect process for " + hostId + " due to " + e.getMessage());
|
||||
handleDisconnectWithoutInvestigation(attache, Event.AgentDisconnected, true);
|
||||
handleDisconnectWithoutInvestigation(attache, Event.AgentDisconnected, true, true);
|
||||
throw ce;
|
||||
} else {
|
||||
s_logger.info("Monitor " + monitor.second().getClass().getSimpleName() + " says not to continue the connect process for " + hostId + " due to " + e.getMessage());
|
||||
handleDisconnectWithoutInvestigation(attache, Event.ShutdownRequested, true);
|
||||
handleDisconnectWithoutInvestigation(attache, Event.ShutdownRequested, true, true);
|
||||
return attache;
|
||||
}
|
||||
} else if (e instanceof HypervisorVersionChangedException) {
|
||||
handleDisconnectWithoutInvestigation(attache, Event.ShutdownRequested, true);
|
||||
handleDisconnectWithoutInvestigation(attache, Event.ShutdownRequested, true, true);
|
||||
throw new CloudRuntimeException("Unable to connect " + attache.getId(), e);
|
||||
} else {
|
||||
s_logger.error("Monitor " + monitor.second().getClass().getSimpleName() + " says there is an error in the connect process for " + hostId + " due to " + e.getMessage(), e);
|
||||
handleDisconnectWithoutInvestigation(attache, Event.AgentDisconnected, true);
|
||||
handleDisconnectWithoutInvestigation(attache, Event.AgentDisconnected, true, true);
|
||||
throw new CloudRuntimeException("Unable to connect " + attache.getId(), e);
|
||||
}
|
||||
}
|
||||
|
|
@ -547,7 +547,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
|
|||
// this is tricky part for secondary storage
|
||||
// make it as disconnected, wait for secondary storage VM to be up
|
||||
// return the attache instead of null, even it is disconnectede
|
||||
handleDisconnectWithoutInvestigation(attache, Event.AgentDisconnected, true);
|
||||
handleDisconnectWithoutInvestigation(attache, Event.AgentDisconnected, true, true);
|
||||
}
|
||||
|
||||
agentStatusTransitTo(host, Event.Ready, _nodeId);
|
||||
|
|
@ -744,7 +744,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
|
|||
return true;
|
||||
}
|
||||
|
||||
protected boolean handleDisconnectWithoutInvestigation(AgentAttache attache, Status.Event event, boolean transitState) {
|
||||
protected boolean handleDisconnectWithoutInvestigation(AgentAttache attache, Status.Event event, boolean transitState, boolean removeAgent) {
|
||||
long hostId = attache.getId();
|
||||
|
||||
s_logger.info("Host " + hostId + " is disconnecting with event " + event);
|
||||
|
|
@ -779,8 +779,10 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
|
|||
s_logger.debug("Deregistering link for " + hostId + " with state " + nextStatus);
|
||||
}
|
||||
|
||||
//remove the attache
|
||||
removeAgent(attache, nextStatus);
|
||||
//For KVM, if removeagent is false, don't remove the agent in agentmap, pingtimeout will pick it up.
|
||||
if (host.getHypervisorType() != HypervisorType.KVM || removeAgent) {
|
||||
removeAgent(attache, nextStatus);
|
||||
}
|
||||
|
||||
//update the DB
|
||||
if (host != null && transitState) {
|
||||
|
|
@ -859,7 +861,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
|
|||
}
|
||||
}
|
||||
|
||||
handleDisconnectWithoutInvestigation(attache, event, true);
|
||||
handleDisconnectWithoutInvestigation(attache, event, true, true);
|
||||
host = _hostDao.findById(host.getId());
|
||||
if (host.getStatus() == Status.Alert || host.getStatus() == Status.Down) {
|
||||
_haMgr.scheduleRestartForVmsOnHost(host, true);
|
||||
|
|
@ -885,7 +887,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
|
|||
if (_investigate == true) {
|
||||
handleDisconnectWithInvestigation(_attache, _event);
|
||||
} else {
|
||||
handleDisconnectWithoutInvestigation(_attache, _event, true);
|
||||
handleDisconnectWithoutInvestigation(_attache, _event, true, false);
|
||||
}
|
||||
} catch (final Exception e) {
|
||||
s_logger.error("Exception caught while handling disconnect: ", e);
|
||||
|
|
@ -979,7 +981,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
|
|||
AgentAttache attache = null;
|
||||
attache = findAttache(hostId);
|
||||
if (attache != null) {
|
||||
handleDisconnectWithoutInvestigation(attache, Event.AgentDisconnected, true);
|
||||
handleDisconnectWithoutInvestigation(attache, Event.AgentDisconnected, true, true);
|
||||
}
|
||||
return true;
|
||||
} else if (event == Event.ShutdownRequested) {
|
||||
|
|
|
|||
|
|
@ -310,19 +310,19 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
|||
}
|
||||
|
||||
@Override
|
||||
protected boolean handleDisconnectWithoutInvestigation(AgentAttache attache, Status.Event event, boolean transitState) {
|
||||
return handleDisconnect(attache, event, false, true);
|
||||
protected boolean handleDisconnectWithoutInvestigation(AgentAttache attache, Status.Event event, boolean transitState, boolean removeAgent) {
|
||||
return handleDisconnect(attache, event, false, true, removeAgent);
|
||||
}
|
||||
|
||||
@Override
|
||||
protected boolean handleDisconnectWithInvestigation(AgentAttache attache, Status.Event event) {
|
||||
return handleDisconnect(attache, event, true, true);
|
||||
return handleDisconnect(attache, event, true, true, true);
|
||||
}
|
||||
|
||||
protected boolean handleDisconnect(AgentAttache agent, Status.Event event, boolean investigate, boolean broadcast) {
|
||||
protected boolean handleDisconnect(AgentAttache agent, Status.Event event, boolean investigate, boolean broadcast, boolean removeAgent) {
|
||||
boolean res;
|
||||
if (!investigate) {
|
||||
res = super.handleDisconnectWithoutInvestigation(agent, event, true);
|
||||
res = super.handleDisconnectWithoutInvestigation(agent, event, true, removeAgent);
|
||||
} else {
|
||||
res = super.handleDisconnectWithInvestigation(agent, event);
|
||||
}
|
||||
|
|
@ -365,7 +365,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
|||
return true;
|
||||
}
|
||||
|
||||
return super.handleDisconnectWithoutInvestigation(attache, Event.AgentDisconnected, false);
|
||||
return super.handleDisconnectWithoutInvestigation(attache, Event.AgentDisconnected, false, true);
|
||||
}
|
||||
|
||||
return true;
|
||||
|
|
@ -1058,7 +1058,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
|||
|
||||
AgentAttache attache = findAttache(hostId);
|
||||
if (attache != null) {
|
||||
result = handleDisconnect(attache, Event.AgentDisconnected, false, false);
|
||||
result = handleDisconnect(attache, Event.AgentDisconnected, false, false, true);
|
||||
}
|
||||
|
||||
if (result) {
|
||||
|
|
@ -1134,7 +1134,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
|||
try {
|
||||
s_logger.debug("Management server " + _nodeId + " failed to rebalance agent " + hostId);
|
||||
_hostTransferDao.completeAgentTransfer(hostId);
|
||||
handleDisconnectWithoutInvestigation(findAttache(hostId), Event.RebalanceFailed, true);
|
||||
handleDisconnectWithoutInvestigation(findAttache(hostId), Event.RebalanceFailed, true, true);
|
||||
} catch (Exception ex) {
|
||||
s_logger.warn("Failed to reconnect host id=" + hostId + " as a part of failed rebalance task cleanup");
|
||||
}
|
||||
|
|
@ -1151,7 +1151,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
|||
synchronized (_agents) {
|
||||
ClusteredDirectAgentAttache attache = (ClusteredDirectAgentAttache)_agents.get(hostId);
|
||||
if (attache != null && attache.getQueueSize() == 0 && attache.getNonRecurringListenersSize() == 0) {
|
||||
handleDisconnectWithoutInvestigation(attache, Event.StartAgentRebalance, true);
|
||||
handleDisconnectWithoutInvestigation(attache, Event.StartAgentRebalance, true, true);
|
||||
ClusteredAgentAttache forwardAttache = (ClusteredAgentAttache)createAttache(hostId);
|
||||
if (forwardAttache == null) {
|
||||
s_logger.warn("Unable to create a forward attache for the host " + hostId + " as a part of rebalance process");
|
||||
|
|
|
|||
Loading…
Reference in New Issue