bug 10791: add data integrity check upon management server startup

This commit is contained in:
Kelven Yang 2011-07-21 17:07:14 -07:00
parent 4ce137ef2e
commit 95b48ba222
5 changed files with 42 additions and 0 deletions

View File

@ -295,6 +295,8 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
}
_nodeId = ManagementServerNode.getManagementServerId();
s_logger.info("Configuring AgentManagerImpl. management server node id(msid): " + _nodeId);
_hostDao.markHostsAsDisconnected(_nodeId);

View File

@ -113,6 +113,8 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
_peers = new HashMap<String, SocketChannel>(7);
_sslEngines = new HashMap<String, SSLEngine>(7);
_nodeId = _clusterMgr.getManagementNodeId();
s_logger.info("Configuring ClusterAgentManagerImpl. management server node id(msid): " + _nodeId);
ConfigurationDao configDao = ComponentLocator.getCurrentLocator().getDao(ConfigurationDao.class);
Map<String, String> params = configDao.getConfiguration(xmlParams);

View File

@ -778,6 +778,20 @@ public class ClusterManagerImpl implements ClusterManager {
// missed cleanup
Date cutTime = DateUtil.currentGMTTime();
List<ManagementServerHostVO> inactiveList = _mshostDao.getInactiveList(new Date(cutTime.getTime() - heartbeatThreshold));
// We don't have foreign key constraints to enforce the mgmt_server_id integrity in host table, when user manually
// remove records from mshost table, this will leave orphan mgmt_serve_id reference in host table.
List<Long> orphanList = _mshostDao.listOrphanMsids();
if(orphanList.size() > 0) {
for(Long orphanMsid : orphanList) {
// construct fake ManagementServerHostVO based on orphan MSID
s_logger.info("Add orphan management server msid found in host table to initial clustering notification, orphan msid: " + orphanMsid);
inactiveList.add(new ManagementServerHostVO(orphanMsid, 0, "orphan", 0, new Date()));
}
} else {
s_logger.info("We are good, no orphan management server msid in host table is found");
}
if(inactiveList.size() > 0) {
this.queueNotification(new ClusterManagerMessage(ClusterManagerMessage.MessageType.nodeRemoved, inactiveList));
}

View File

@ -43,4 +43,5 @@ public interface ManagementServerHostDao extends GenericDao<ManagementServerHost
void update(long id, long runId, State state, Date lastUpdate);
List<ManagementServerHostVO> listBy(ManagementServerHost.State...states);
public List<Long> listOrphanMsids();
}

View File

@ -19,7 +19,9 @@
package com.cloud.cluster.dao;
import java.sql.PreparedStatement;
import java.sql.ResultSet;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.TimeZone;
@ -231,5 +233,26 @@ public class ManagementServerHostDaoImpl extends GenericDaoBase<ManagementServer
sc.setParameters("state", (Object[]) states);
return listBy(sc);
}
@Override
public List<Long> listOrphanMsids() {
List<Long> orphanList = new ArrayList<Long>();
Transaction txn = Transaction.currentTxn();
PreparedStatement pstmt = null;
try {
pstmt = txn.prepareAutoCloseStatement(
"select t.mgmt_server_id from (select mgmt_server_id, count(*) as count from host group by mgmt_server_id) as t WHERE t.count > 0 AND t.mgmt_server_id NOT IN (select msid from mshost)");
ResultSet rs = pstmt.executeQuery();
while(rs.next()) {
orphanList.add(rs.getLong(1));
}
} catch (SQLException e) {
throw new CloudRuntimeException("DB exception on " + pstmt.toString(), e);
}
return orphanList;
}
}