mirror of https://github.com/apache/cloudstack.git
bug 8558: improve cluster management
This commit is contained in:
parent
b6ae35728e
commit
6203ba6bfe
|
|
@ -81,7 +81,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
|||
@Override
|
||||
public boolean configure(String name, Map<String, Object> params) throws ConfigurationException {
|
||||
_peers = new HashMap<String, SocketChannel>(7);
|
||||
_nodeId = _clusterMgr.getId();
|
||||
_nodeId = _clusterMgr.getManagementNodeId();
|
||||
|
||||
ClusteredAgentAttache.initialize(this);
|
||||
|
||||
|
|
@ -120,7 +120,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
|||
|
||||
// for agents that are self-managed, threshold to be considered as disconnected is 3 ping intervals
|
||||
long cutSeconds = (System.currentTimeMillis() >> 10) - (_pingInterval*3);
|
||||
List<HostVO> hosts = _hostDao.findDirectAgentToLoad(_clusterMgr.getId(), cutSeconds, LOAD_SIZE);
|
||||
List<HostVO> hosts = _hostDao.findDirectAgentToLoad(_clusterMgr.getManagementNodeId(), cutSeconds, LOAD_SIZE);
|
||||
if ( hosts != null && hosts.size() == LOAD_SIZE ) {
|
||||
Long clusterId = hosts.get((int)(LOAD_SIZE-1)).getClusterId();
|
||||
if ( clusterId != null) {
|
||||
|
|
|
|||
|
|
@ -592,7 +592,7 @@ public class AsyncJobManagerImpl implements AsyncJobManager, ClusterManagerListe
|
|||
|
||||
private long getMsid() {
|
||||
if(_clusterMgr != null)
|
||||
return _clusterMgr.getId();
|
||||
return _clusterMgr.getManagementNodeId();
|
||||
|
||||
return MacAddress.getMacAddress().toLong();
|
||||
}
|
||||
|
|
|
|||
|
|
@ -41,10 +41,14 @@ public interface ClusterManager extends Manager {
|
|||
public boolean executeAgentUserRequest(long agentId, Event event) throws AgentUnavailableException;
|
||||
public Boolean propagateAgentEvent(long agentId, Event event) throws AgentUnavailableException;
|
||||
|
||||
public int getHeartbeatThreshold();
|
||||
public long getId();
|
||||
public long getCurrentRunId();
|
||||
public boolean isManageemnNodeAlive(long msid);
|
||||
public int getHeartbeatThreshold();
|
||||
|
||||
public long getManagementNodeId();
|
||||
public long getCurrentRunId();
|
||||
|
||||
public boolean isManagementNodeAlive(long msid);
|
||||
public boolean pingManagementNode(long msid);
|
||||
|
||||
public String getSelfPeerName();
|
||||
public String getSelfNodeIP();
|
||||
public String getPeerName(long agentHostId);
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ import java.util.ArrayList;
|
|||
import java.util.Date;
|
||||
import java.util.Enumeration;
|
||||
import java.util.HashMap;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
|
|
@ -39,6 +40,7 @@ import com.cloud.host.dao.HostDao;
|
|||
import com.cloud.serializer.GsonHelper;
|
||||
import com.cloud.utils.DateUtil;
|
||||
import com.cloud.utils.NumbersUtil;
|
||||
import com.cloud.utils.Profiler;
|
||||
import com.cloud.utils.PropertiesUtil;
|
||||
import com.cloud.utils.component.Adapters;
|
||||
import com.cloud.utils.component.ComponentLocator;
|
||||
|
|
@ -85,6 +87,7 @@ public class ClusterManagerImpl implements ClusterManager {
|
|||
protected long _runId = System.currentTimeMillis();
|
||||
|
||||
private Long _mshostId = null;
|
||||
private boolean _peerScanInited = false;
|
||||
|
||||
private String _name;
|
||||
private String _clusterNodeIP = "127.0.0.1";
|
||||
|
|
@ -537,6 +540,11 @@ public class ClusterManagerImpl implements ClusterManager {
|
|||
s_logger.trace("Cluster manager peer-scan, id:" + _mshostId);
|
||||
}
|
||||
|
||||
if(!_peerScanInited) {
|
||||
_peerScanInited = true;
|
||||
initPeerScan();
|
||||
}
|
||||
|
||||
peerScan();
|
||||
} catch (Throwable e) {
|
||||
s_logger.error("Problem with the cluster peer-scan!", e);
|
||||
|
|
@ -545,6 +553,15 @@ public class ClusterManagerImpl implements ClusterManager {
|
|||
};
|
||||
}
|
||||
|
||||
private void initPeerScan() {
|
||||
// upon startup, for all inactive management server nodes that we see at startup time, we will send notification also to help upper layer perform
|
||||
// missed cleanup
|
||||
Date cutTime = DateUtil.currentGMTTime();
|
||||
List<ManagementServerHostVO> inactiveList = _mshostDao.getInactiveList(new Date(cutTime.getTime() - heartbeatThreshold));
|
||||
if(inactiveList.size() > 0)
|
||||
notifyNodeLeft(inactiveList);
|
||||
}
|
||||
|
||||
private void peerScan() {
|
||||
Date cutTime = DateUtil.currentGMTTime();
|
||||
|
||||
|
|
@ -564,14 +581,22 @@ public class ClusterManagerImpl implements ClusterManager {
|
|||
}
|
||||
}
|
||||
}
|
||||
|
||||
for(ManagementServerHostVO mshost : removedNodeList) {
|
||||
activePeers.remove(mshost.getId());
|
||||
|
||||
try {
|
||||
JmxUtil.unregisterMBean("ClusterManager", "Node " + mshost.getId());
|
||||
} catch(Exception e) {
|
||||
s_logger.warn("Unable to deregiester cluster node from JMX monitoring due to exception " + e.toString());
|
||||
|
||||
Iterator<ManagementServerHostVO> it = removedNodeList.iterator();
|
||||
while(it.hasNext()) {
|
||||
ManagementServerHostVO mshost = it.next();
|
||||
if(!pingManagementNode(mshost.getId())) {
|
||||
s_logger.warn("Management node " + mshost.getId() + " is detected inactive by timestamp and also not pingable");
|
||||
activePeers.remove(mshost.getId());
|
||||
|
||||
try {
|
||||
JmxUtil.unregisterMBean("ClusterManager", "Node " + mshost.getId());
|
||||
} catch(Exception e) {
|
||||
s_logger.warn("Unable to deregiester cluster node from JMX monitoring due to exception " + e.toString());
|
||||
}
|
||||
} else {
|
||||
s_logger.info("Management node " + mshost.getId() + " is detected inactive by timestamp but is pingable");
|
||||
it.remove();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -594,11 +619,33 @@ public class ClusterManagerImpl implements ClusterManager {
|
|||
}
|
||||
|
||||
if(newNodeList.size() > 0) {
|
||||
Profiler profiler = new Profiler();
|
||||
profiler.start();
|
||||
|
||||
notifyNodeJoined(newNodeList);
|
||||
}
|
||||
|
||||
profiler.stop();
|
||||
if(profiler.getDuration() > 1000) {
|
||||
if(s_logger.isDebugEnabled())
|
||||
s_logger.debug("Notifying management server join event took " + profiler.getDuration() + " ms");
|
||||
} else {
|
||||
s_logger.warn("Notifying management server join event took " + profiler.getDuration() + " ms");
|
||||
}
|
||||
}
|
||||
|
||||
if(removedNodeList.size() > 0) {
|
||||
notifyNodeLeft(removedNodeList);
|
||||
Profiler profiler = new Profiler();
|
||||
profiler.start();
|
||||
|
||||
notifyNodeLeft(removedNodeList);
|
||||
|
||||
profiler.stop();
|
||||
if(profiler.getDuration() > 1000) {
|
||||
if(s_logger.isDebugEnabled())
|
||||
s_logger.debug("Notifying management server leave event took " + profiler.getDuration() + " ms");
|
||||
} else {
|
||||
s_logger.warn("Notifying management server leave event took " + profiler.getDuration() + " ms");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -678,6 +725,7 @@ public class ClusterManagerImpl implements ClusterManager {
|
|||
if(s_logger.isInfoEnabled()) {
|
||||
s_logger.info("Cluster manager is started");
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -708,6 +756,7 @@ public class ClusterManagerImpl implements ClusterManager {
|
|||
if(s_logger.isInfoEnabled()) {
|
||||
s_logger.info("Start configuring cluster manager : " + name);
|
||||
}
|
||||
_name = name;
|
||||
|
||||
ComponentLocator locator = ComponentLocator.getCurrentLocator();
|
||||
_agentMgr = locator.getManager(AgentManager.class);
|
||||
|
|
@ -741,8 +790,6 @@ public class ClusterManagerImpl implements ClusterManager {
|
|||
if(value != null) {
|
||||
heartbeatThreshold = NumbersUtil.parseInt(value, ClusterManager.DEFAULT_HEARTBEAT_THRESHOLD);
|
||||
}
|
||||
|
||||
_name = name;
|
||||
|
||||
File dbPropsFile = PropertiesUtil.findConfigFile("db.properties");
|
||||
Properties dbProps = new Properties();
|
||||
|
|
@ -757,6 +804,8 @@ public class ClusterManagerImpl implements ClusterManager {
|
|||
if(_clusterNodeIP == null) {
|
||||
_clusterNodeIP = "127.0.0.1";
|
||||
}
|
||||
_clusterNodeIP = _clusterNodeIP.trim();
|
||||
|
||||
if(s_logger.isInfoEnabled()) {
|
||||
s_logger.info("Cluster node IP : " + _clusterNodeIP);
|
||||
}
|
||||
|
|
@ -778,6 +827,8 @@ public class ClusterManagerImpl implements ClusterManager {
|
|||
throw new ConfigurationException("Unable to set current cluster service adapter");
|
||||
}
|
||||
|
||||
checkConflicts();
|
||||
|
||||
if(s_logger.isInfoEnabled()) {
|
||||
s_logger.info("Cluster manager is configured.");
|
||||
}
|
||||
|
|
@ -785,7 +836,7 @@ public class ClusterManagerImpl implements ClusterManager {
|
|||
}
|
||||
|
||||
@Override
|
||||
public long getId() {
|
||||
public long getManagementNodeId() {
|
||||
return _id;
|
||||
}
|
||||
|
||||
|
|
@ -795,7 +846,7 @@ public class ClusterManagerImpl implements ClusterManager {
|
|||
}
|
||||
|
||||
@Override
|
||||
public boolean isManageemnNodeAlive(long msid) {
|
||||
public boolean isManagementNodeAlive(long msid) {
|
||||
ManagementServerHostVO mshost = _mshostDao.findById(msid);
|
||||
if(mshost != null) {
|
||||
if(mshost.getLastUpdateTime().getTime() >= DateUtil.currentGMTTime().getTime() - heartbeatThreshold)
|
||||
|
|
@ -805,6 +856,42 @@ public class ClusterManagerImpl implements ClusterManager {
|
|||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean pingManagementNode(long msid) {
|
||||
ManagementServerHostVO mshost = _mshostDao.findById(msid);
|
||||
if(mshost == null)
|
||||
return false;
|
||||
|
||||
String targetIp = mshost.getServiceIP();
|
||||
if("127.0.0.1".equals(targetIp) || "0.0.0.0".equals(targetIp)) {
|
||||
s_logger.info("ping management node cluster service can not be performed on self");
|
||||
return false;
|
||||
}
|
||||
|
||||
String targetPeer = String.valueOf(msid);
|
||||
ClusterService peerService = null;
|
||||
for(int i = 0; i < 2; i++) {
|
||||
try {
|
||||
peerService = getPeerService(targetPeer);
|
||||
} catch (RemoteException e) {
|
||||
s_logger.error("cluster service for peer " + targetPeer + " no longer exists");
|
||||
}
|
||||
|
||||
if(peerService != null) {
|
||||
try {
|
||||
return peerService.ping(getSelfPeerName());
|
||||
} catch (RemoteException e) {
|
||||
s_logger.warn("Exception in performing remote call, ", e);
|
||||
invalidatePeerService(targetPeer);
|
||||
}
|
||||
} else {
|
||||
s_logger.warn("Remote peer " + msid + " no longer exists");
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getHeartbeatThreshold() {
|
||||
return this.heartbeatThreshold;
|
||||
|
|
@ -817,4 +904,25 @@ public class ClusterManagerImpl implements ClusterManager {
|
|||
public void setHeartbeatThreshold(int threshold) {
|
||||
heartbeatThreshold = threshold;
|
||||
}
|
||||
|
||||
private void checkConflicts() throws ConfigurationException {
|
||||
Date cutTime = DateUtil.currentGMTTime();
|
||||
List<ManagementServerHostVO> peers = _mshostDao.getActiveList(new Date(cutTime.getTime() - heartbeatThreshold));
|
||||
for(ManagementServerHostVO peer : peers) {
|
||||
if(_mshostId == peer.getMsid()) {
|
||||
// skip check on itself
|
||||
continue;
|
||||
}
|
||||
|
||||
if(_clusterNodeIP.equals(peer.getServiceIP().trim())) {
|
||||
if("127.0.0.1".equals(_clusterNodeIP)) {
|
||||
throw new ConfigurationException("Detected another management node with localhost IP is already running, please check your cluster configuration");
|
||||
} else {
|
||||
if(!pingManagementNode(peer.getId())) {
|
||||
throw new ConfigurationException("Detected that another management node with the same IP " + peer.getServiceIP() + " is already running");
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -18,11 +18,12 @@
|
|||
|
||||
package com.cloud.cluster;
|
||||
|
||||
import java.rmi.Remote;
|
||||
import java.rmi.RemoteException;
|
||||
import java.rmi.Remote;
|
||||
import java.rmi.RemoteException;
|
||||
|
||||
public interface ClusterService extends Remote {
|
||||
String execute(String callingPeer, long agentId, String gsonPackage, boolean stopOnError) throws RemoteException;
|
||||
long executeAsync(String callingPeer, long agentId, String gsonPackage, boolean stopOnError) throws RemoteException;
|
||||
boolean onAsyncResult(String executingPeer, long agentId, long seq, String gsonPackage) throws RemoteException;
|
||||
boolean onAsyncResult(String executingPeer, long agentId, long seq, String gsonPackage) throws RemoteException;
|
||||
boolean ping(String callingPeer) throws RemoteException;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,136 +0,0 @@
|
|||
package com.cloud.cluster;
|
||||
|
||||
import java.io.File;
|
||||
import java.io.FileInputStream;
|
||||
import java.io.FileNotFoundException;
|
||||
import java.io.IOException;
|
||||
import java.net.MalformedURLException;
|
||||
import java.rmi.Naming;
|
||||
import java.rmi.NotBoundException;
|
||||
import java.rmi.RemoteException;
|
||||
import java.rmi.registry.LocateRegistry;
|
||||
import java.rmi.registry.Registry;
|
||||
import java.rmi.server.UnicastRemoteObject;
|
||||
import java.util.Map;
|
||||
import java.util.Properties;
|
||||
|
||||
import javax.ejb.Local;
|
||||
import javax.naming.ConfigurationException;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import com.cloud.cluster.ClusterManager;
|
||||
import com.cloud.cluster.ClusterService;
|
||||
import com.cloud.cluster.ManagementServerHostVO;
|
||||
import com.cloud.cluster.dao.ManagementServerHostDao;
|
||||
import com.cloud.utils.NumbersUtil;
|
||||
import com.cloud.utils.PropertiesUtil;
|
||||
import com.cloud.utils.component.ComponentLocator;
|
||||
|
||||
@Local(value={ClusterServiceAdapter.class})
|
||||
public class ClusterServiceRMIAdapter implements ClusterServiceAdapter {
|
||||
private static final Logger s_logger = Logger.getLogger(ClusterServiceRMIAdapter.class);
|
||||
private static final int DEFAULT_SERVICE_PORT = 1099;
|
||||
|
||||
private ClusterManager manager;
|
||||
|
||||
private ManagementServerHostDao _mshostDao;
|
||||
|
||||
private String _name;
|
||||
private int _clusterServicePort = DEFAULT_SERVICE_PORT;
|
||||
|
||||
@Override
|
||||
public ClusterService getPeerService(String strPeer) throws RemoteException {
|
||||
try {
|
||||
return (ClusterService)Naming.lookup(getServiceEndpointName(strPeer));
|
||||
} catch (MalformedURLException e) {
|
||||
s_logger.error("Malformed URL in cluster peer name");
|
||||
} catch (NotBoundException e) {
|
||||
s_logger.error("Unbound RMI exception");
|
||||
}
|
||||
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getServiceEndpointName(String strPeer) {
|
||||
long msid = Long.parseLong(strPeer);
|
||||
|
||||
ManagementServerHostVO mshost = _mshostDao.findByMsid(msid);
|
||||
if(mshost == null)
|
||||
return null;
|
||||
|
||||
return composeEndpointName(mshost.getServiceIP(), mshost.getServicePort());
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getServicePort() {
|
||||
return _clusterServicePort;
|
||||
}
|
||||
|
||||
private String composeEndpointName(String nodeIP, int port) {
|
||||
StringBuffer sb = new StringBuffer();
|
||||
sb.append("//").append(nodeIP).append(":").append(port).append("/clusterservice");
|
||||
return sb.toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean configure(String name, Map<String, Object> params) throws ConfigurationException {
|
||||
_name = name;
|
||||
|
||||
ComponentLocator locator = ComponentLocator.getCurrentLocator();
|
||||
manager = locator.getManager(ClusterManager.class);
|
||||
if(manager == null)
|
||||
throw new ConfigurationException("Unable to get " + ClusterManager.class.getName());
|
||||
|
||||
_mshostDao = locator.getDao(ManagementServerHostDao.class);
|
||||
if(_mshostDao == null)
|
||||
throw new ConfigurationException("Unable to get " + ManagementServerHostDao.class.getName());
|
||||
|
||||
|
||||
File dbPropsFile = PropertiesUtil.findConfigFile("db.properties");
|
||||
Properties dbProps = new Properties();
|
||||
try {
|
||||
dbProps.load(new FileInputStream(dbPropsFile));
|
||||
} catch (FileNotFoundException e) {
|
||||
throw new ConfigurationException("Unable to find db.properties");
|
||||
} catch (IOException e) {
|
||||
throw new ConfigurationException("Unable to load db.properties content");
|
||||
}
|
||||
|
||||
_clusterServicePort = NumbersUtil.parseInt(dbProps.getProperty("cluster.rmi.port"), DEFAULT_SERVICE_PORT);
|
||||
if(s_logger.isInfoEnabled())
|
||||
s_logger.info("Cluster RMI port : " + _clusterServicePort);
|
||||
|
||||
// configuration and initialization
|
||||
if (System.getSecurityManager() == null) {
|
||||
System.setSecurityManager(new SecurityManager());
|
||||
}
|
||||
|
||||
try {
|
||||
ClusterService service = (ClusterService) UnicastRemoteObject.exportObject(
|
||||
new ClusterServiceRMIImpl(manager), 0);
|
||||
Registry registry = LocateRegistry.getRegistry(_clusterServicePort);
|
||||
registry.rebind(composeEndpointName(manager.getSelfNodeIP(), getServicePort()), service);
|
||||
} catch (Exception e) {
|
||||
throw new ConfigurationException("Unable to register RMI cluster service");
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getName() {
|
||||
return _name;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean start() {
|
||||
return true;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean stop() {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
|
@ -1,92 +0,0 @@
|
|||
package com.cloud.cluster;
|
||||
|
||||
import java.rmi.RemoteException;
|
||||
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import com.cloud.agent.Listener;
|
||||
import com.cloud.agent.api.Answer;
|
||||
import com.cloud.agent.api.Command;
|
||||
import com.cloud.cluster.ClusterManager;
|
||||
import com.cloud.cluster.ClusterService;
|
||||
import com.cloud.exception.AgentUnavailableException;
|
||||
import com.cloud.exception.OperationTimedoutException;
|
||||
import com.cloud.serializer.GsonHelper;
|
||||
import com.google.gson.Gson;
|
||||
|
||||
public class ClusterServiceRMIImpl implements ClusterService {
|
||||
private static final Logger s_logger = Logger.getLogger(ClusterServiceRMIImpl.class);
|
||||
|
||||
private ClusterManager manager;
|
||||
private Gson gson;
|
||||
|
||||
public ClusterServiceRMIImpl() {
|
||||
gson = GsonHelper.getBuilder().create();
|
||||
}
|
||||
|
||||
public ClusterServiceRMIImpl(ClusterManager manager) {
|
||||
this.manager = manager;
|
||||
|
||||
gson = GsonHelper.getBuilder().create();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String execute(String callingPeer, long agentId, String gsonPackage, boolean stopOnError) throws RemoteException {
|
||||
|
||||
if(s_logger.isInfoEnabled())
|
||||
s_logger.info("Execute command forwarded from peer : " + callingPeer);
|
||||
|
||||
Command [] cmds = null;
|
||||
try {
|
||||
cmds = gson.fromJson(gsonPackage, Command[].class);
|
||||
} catch(Throwable e) {
|
||||
assert(false);
|
||||
}
|
||||
|
||||
try {
|
||||
Answer[] answers = manager.sendToAgent(agentId, cmds, stopOnError);
|
||||
if(answers != null)
|
||||
return gson.toJson(answers);
|
||||
} catch (AgentUnavailableException e) {
|
||||
s_logger.warn("Agent unavailable", e);
|
||||
} catch (OperationTimedoutException e) {
|
||||
s_logger.warn("Timed Out", e);
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@Override
|
||||
public long executeAsync(String callingPeer, long agentId, String gsonPackage, boolean stopOnError) throws RemoteException {
|
||||
|
||||
if(s_logger.isInfoEnabled())
|
||||
s_logger.info("Execute Async command forwarded from peer : " + callingPeer);
|
||||
|
||||
Command [] cmds = null;
|
||||
try {
|
||||
cmds = gson.fromJson(gsonPackage, Command[].class);
|
||||
} catch(Throwable e) {
|
||||
assert(false);
|
||||
}
|
||||
|
||||
Listener listener = new ClusterAsyncExectuionListener(manager, callingPeer);
|
||||
try {
|
||||
return manager.sendToAgent(agentId, cmds, stopOnError, listener);
|
||||
} catch (AgentUnavailableException e) {
|
||||
s_logger.warn("Agent is unavailable", e);
|
||||
return -1;
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean onAsyncResult(String executingPeer, long agentId, long seq, String gsonPackage) throws RemoteException {
|
||||
Answer[] answers = null;
|
||||
try {
|
||||
answers = gson.fromJson(gsonPackage, Answer[].class);
|
||||
} catch(Throwable e) {
|
||||
assert(false);
|
||||
}
|
||||
|
||||
return manager.onAsyncResult(executingPeer, agentId, seq, answers);
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -20,8 +20,6 @@ import com.cloud.agent.api.Answer;
|
|||
import com.cloud.agent.api.ChangeAgentAnswer;
|
||||
import com.cloud.agent.api.ChangeAgentCommand;
|
||||
import com.cloud.agent.api.Command;
|
||||
import com.cloud.cluster.ClusterManager;
|
||||
import com.cloud.cluster.RemoteMethodConstants;
|
||||
import com.cloud.exception.AgentUnavailableException;
|
||||
import com.cloud.exception.OperationTimedoutException;
|
||||
import com.cloud.serializer.GsonHelper;
|
||||
|
|
@ -122,6 +120,10 @@ public class ClusterServiceServletHttpHandler implements HttpRequestHandler {
|
|||
responseContent = handleAsyncResultMethodCall(req);
|
||||
break;
|
||||
|
||||
case RemoteMethodConstants.METHOD_PING :
|
||||
responseContent = handlePingMethodCall(req);
|
||||
break;
|
||||
|
||||
case RemoteMethodConstants.METHOD_UNKNOWN :
|
||||
default :
|
||||
assert(false);
|
||||
|
|
@ -278,4 +280,13 @@ public class ClusterServiceServletHttpHandler implements HttpRequestHandler {
|
|||
|
||||
return "recurring=false";
|
||||
}
|
||||
|
||||
private String handlePingMethodCall(HttpRequest req) {
|
||||
String callingPeer = (String)req.getParams().getParameter("callingPeer");
|
||||
|
||||
if(s_logger.isDebugEnabled())
|
||||
s_logger.debug("Handle ping request from " + callingPeer);
|
||||
|
||||
return "true";
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,8 +9,6 @@ import org.apache.commons.httpclient.HttpStatus;
|
|||
import org.apache.commons.httpclient.methods.PostMethod;
|
||||
import org.apache.log4j.Logger;
|
||||
|
||||
import com.cloud.cluster.ClusterService;
|
||||
import com.cloud.cluster.RemoteMethodConstants;
|
||||
import com.cloud.serializer.GsonHelper;
|
||||
import com.google.gson.Gson;
|
||||
|
||||
|
|
@ -105,6 +103,21 @@ public class ClusterServiceServletImpl implements ClusterService {
|
|||
return false;
|
||||
}
|
||||
|
||||
public boolean ping(String callingPeer) throws RemoteException {
|
||||
if(s_logger.isDebugEnabled())
|
||||
s_logger.debug("Ping at " + serviceUrl);
|
||||
|
||||
HttpClient client = new HttpClient();
|
||||
PostMethod method = new PostMethod(serviceUrl);
|
||||
|
||||
method.addParameter("method", Integer.toString(RemoteMethodConstants.METHOD_PING));
|
||||
method.addParameter("callingPeer", callingPeer);
|
||||
String returnVal = executePostMethod(client, method);
|
||||
if("true".equalsIgnoreCase(returnVal))
|
||||
return true;
|
||||
return false;
|
||||
}
|
||||
|
||||
private String executePostMethod(HttpClient client, PostMethod method) {
|
||||
int response = 0;
|
||||
String result = null;
|
||||
|
|
|
|||
|
|
@ -81,7 +81,7 @@ public class DummyClusterManagerImpl implements ClusterManager {
|
|||
return ClusterManager.DEFAULT_HEARTBEAT_INTERVAL;
|
||||
}
|
||||
|
||||
public long getId() {
|
||||
public long getManagementNodeId() {
|
||||
return _id;
|
||||
}
|
||||
|
||||
|
|
@ -102,9 +102,13 @@ public class DummyClusterManagerImpl implements ClusterManager {
|
|||
return _clusterNodeIP;
|
||||
}
|
||||
|
||||
public boolean isManageemnNodeAlive(long msid) {
|
||||
public boolean isManagementNodeAlive(long msid) {
|
||||
return true;
|
||||
}
|
||||
|
||||
public boolean pingManagementNode(long msid) {
|
||||
return false;
|
||||
}
|
||||
|
||||
public String getPeerName(long agentHostId) {
|
||||
throw new CloudRuntimeException("Unsupported feature");
|
||||
|
|
|
|||
|
|
@ -22,5 +22,6 @@ public interface RemoteMethodConstants {
|
|||
public static final int METHOD_UNKNOWN = 0;
|
||||
public static final int METHOD_EXECUTE = 1;
|
||||
public static final int METHOD_EXECUTE_ASYNC = 2;
|
||||
public static final int METHOD_ASYNC_RESULT = 3;
|
||||
public static final int METHOD_ASYNC_RESULT = 3;
|
||||
public static final int METHOD_PING = 4;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -18,9 +18,9 @@
|
|||
|
||||
package com.cloud.cluster.dao;
|
||||
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
|
||||
import java.util.Date;
|
||||
import java.util.List;
|
||||
|
||||
import com.cloud.cluster.ManagementServerHostVO;
|
||||
import com.cloud.utils.db.GenericDao;
|
||||
|
||||
|
|
@ -30,5 +30,6 @@ public interface ManagementServerHostDao extends GenericDao<ManagementServerHost
|
|||
void update(long id, Date lastUpdate);
|
||||
|
||||
List<ManagementServerHostVO> getActiveList(Date cutTime);
|
||||
List<ManagementServerHostVO> getInactiveList(Date cutTime);
|
||||
void increaseAlertCount(long id);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -39,6 +39,8 @@ public class ManagementServerHostDaoImpl extends GenericDaoBase<ManagementServer
|
|||
private static final Logger s_logger = Logger.getLogger(ManagementServerHostDaoImpl.class);
|
||||
|
||||
private final SearchBuilder<ManagementServerHostVO> MsIdSearch;
|
||||
private final SearchBuilder<ManagementServerHostVO> ActiveSearch;
|
||||
private final SearchBuilder<ManagementServerHostVO> InactiveSearch;
|
||||
|
||||
public ManagementServerHostVO findByMsid(long msid) {
|
||||
SearchCriteria<ManagementServerHostVO> sc = MsIdSearch.create();
|
||||
|
|
@ -94,15 +96,18 @@ public class ManagementServerHostDaoImpl extends GenericDaoBase<ManagementServer
|
|||
}
|
||||
|
||||
public List<ManagementServerHostVO> getActiveList(Date cutTime) {
|
||||
SearchBuilder<ManagementServerHostVO> activeSearch = createSearchBuilder();
|
||||
activeSearch.and("lastUpdateTime", activeSearch.entity().getLastUpdateTime(), SearchCriteria.Op.GT);
|
||||
activeSearch.and("removed", activeSearch.entity().getRemoved(), SearchCriteria.Op.NULL);
|
||||
|
||||
SearchCriteria<ManagementServerHostVO> sc = activeSearch.create();
|
||||
SearchCriteria<ManagementServerHostVO> sc = ActiveSearch.create();
|
||||
sc.setParameters("lastUpdateTime", cutTime);
|
||||
|
||||
return listIncludingRemovedBy(sc);
|
||||
}
|
||||
}
|
||||
|
||||
public List<ManagementServerHostVO> getInactiveList(Date cutTime) {
|
||||
SearchCriteria<ManagementServerHostVO> sc = InactiveSearch.create();
|
||||
sc.setParameters("lastUpdateTime", cutTime);
|
||||
|
||||
return listIncludingRemovedBy(sc);
|
||||
}
|
||||
|
||||
public void increaseAlertCount(long id) {
|
||||
Transaction txn = Transaction.currentTxn();
|
||||
|
|
@ -124,6 +129,16 @@ public class ManagementServerHostDaoImpl extends GenericDaoBase<ManagementServer
|
|||
protected ManagementServerHostDaoImpl() {
|
||||
MsIdSearch = createSearchBuilder();
|
||||
MsIdSearch.and("msid", MsIdSearch.entity().getMsid(), SearchCriteria.Op.EQ);
|
||||
MsIdSearch.done();
|
||||
MsIdSearch.done();
|
||||
|
||||
ActiveSearch = createSearchBuilder();
|
||||
ActiveSearch.and("lastUpdateTime", ActiveSearch.entity().getLastUpdateTime(), SearchCriteria.Op.GT);
|
||||
ActiveSearch.and("removed", ActiveSearch.entity().getRemoved(), SearchCriteria.Op.NULL);
|
||||
ActiveSearch.done();
|
||||
|
||||
InactiveSearch = createSearchBuilder();
|
||||
InactiveSearch.and("lastUpdateTime", InactiveSearch.entity().getLastUpdateTime(), SearchCriteria.Op.LTEQ);
|
||||
InactiveSearch.and("removed", InactiveSearch.entity().getRemoved(), SearchCriteria.Op.NULL);
|
||||
InactiveSearch.done();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -45,7 +45,7 @@ public class StackMaidManagerImpl implements StackMaidManager {
|
|||
if (s_logger.isInfoEnabled())
|
||||
s_logger.info("Start configuring StackMaidManager : " + name);
|
||||
|
||||
StackMaid.init(_clusterMgr.getId());
|
||||
StackMaid.init(_clusterMgr.getManagementNodeId());
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -88,7 +88,7 @@ public class StackMaidManagerImpl implements StackMaidManager {
|
|||
|
||||
public boolean start() {
|
||||
try {
|
||||
List<StackMaidVO> l = _maidDao.listLeftoversByMsid(_clusterMgr.getId());
|
||||
List<StackMaidVO> l = _maidDao.listLeftoversByMsid(_clusterMgr.getManagementNodeId());
|
||||
cleanupLeftovers(l);
|
||||
} catch(Throwable e) {
|
||||
s_logger.error("Unexpected exception " + e.getMessage(), e);
|
||||
|
|
|
|||
|
|
@ -398,7 +398,7 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene
|
|||
_operationTimeout = NumbersUtil.parseInt(params.get(Config.Wait.key()), 1800) * 2;
|
||||
|
||||
_executor = Executors.newScheduledThreadPool(1, new NamedThreadFactory("Vm-Operations-Cleanup"));
|
||||
_nodeId = _clusterMgr.getId();
|
||||
_nodeId = _clusterMgr.getManagementNodeId();
|
||||
|
||||
_agentMgr.registerForHostEvents(this, true, true, true);
|
||||
|
||||
|
|
|
|||
Loading…
Reference in New Issue