Bug 10197: do not check timeout against cluster which is not managed

This commit is contained in:
anthony 2011-08-01 16:59:54 -07:00
parent 5ba6d3fd3a
commit 7d02ed344e
4 changed files with 38 additions and 44 deletions

View File

@ -1085,7 +1085,7 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager {
_hostDao.disconnect(host, event, _nodeId);
host = _hostDao.findById(host.getId());
if (event.equals(Event.PrepareUnmanaged) && (host.getStatus() == Status.Alert || host.getStatus() == Status.Down)) {
if (!event.equals(Event.PrepareUnmanaged) && (host.getStatus() == Status.Alert || host.getStatus() == Status.Down)) {
_haMgr.scheduleRestartForVmsOnHost(host, investigate);
}

View File

@ -33,6 +33,7 @@ import com.cloud.agent.api.StartupCommand;
import com.cloud.alert.AlertManager;
import com.cloud.dc.DataCenterVO;
import com.cloud.dc.HostPodVO;
import com.cloud.dc.dao.ClusterDao;
import com.cloud.dc.dao.DataCenterDao;
import com.cloud.dc.dao.HostPodDao;
import com.cloud.host.Host;
@ -40,6 +41,7 @@ import com.cloud.host.HostVO;
import com.cloud.host.Status;
import com.cloud.host.Status.Event;
import com.cloud.host.dao.HostDao;
import com.cloud.utils.component.Inject;
import com.cloud.utils.db.ConnectionConcierge;
import com.cloud.utils.db.DB;
import com.cloud.utils.db.GlobalLock;
@ -60,6 +62,8 @@ public class AgentMonitor extends Thread implements Listener {
private AlertManager _alertMgr;
private long _msId;
private ConnectionConcierge _concierge;
@Inject
ClusterDao _clusterDao;
protected AgentMonitor() {
}
@ -117,13 +121,6 @@ public class AgentMonitor extends Thread implements Listener {
}
for (HostVO host : hosts) {
if (host.getType().equals(Host.Type.ExternalFirewall) ||
host.getType().equals(Host.Type.ExternalLoadBalancer) ||
host.getType().equals(Host.Type.TrafficMonitor) ||
host.getType().equals(Host.Type.SecondaryStorage)) {
continue;
}
if (host.getManagementServerId() == null || host.getManagementServerId() == _msId) {
if (s_logger.isInfoEnabled()) {
s_logger.info("Asking agent mgr to investgate why host " + host.getId() + " is behind on ping. last ping time: " + host.getLastPinged());
@ -137,10 +134,10 @@ public class AgentMonitor extends Thread implements Listener {
long hostId = host.getId();
DataCenterVO dcVO = _dcDao.findById(host.getDataCenterId());
HostPodVO podVO = _podDao.findById(host.getPodId());
String hostDesc = "name: " + host.getName() + " (id:" + host.getId() + "), availability zone: " + dcVO.getName() + ", pod: " + podVO.getName();
String hostDesc = "name: " + host.getName() + " (id:" + hostId + "), availability zone: " + dcVO.getName() + ", pod: " + podVO.getName();
if (host.getType() != Host.Type.Storage) {
List<VMInstanceVO> vos = _vmDao.listByHostId(host.getId());
List<VMInstanceVO> vos = _vmDao.listByHostId(hostId);
if (vos.size() == 0) {
_alertMgr.sendAlert(AlertManager.ALERT_TYPE_HOST, host.getDataCenterId(), host.getPodId(), "Migration Complete for host " + hostDesc, "Host [" + hostDesc + "] is ready for maintenance");
_hostDao.updateStatus(host, Event.PreparationComplete, _msId);

View File

@ -109,14 +109,6 @@ public interface HostDao extends GenericDao<HostVO, Long> {
*/
List<HostVO> listByType(Type type);
/**
* Find hosts that have not responded to a ping regardless of state
* @param timeout
* @param type
* @return
*/
List<HostVO> findLostHosts2(long timeout, Type type);
/**
* update the host and changes the status depending on the Event and
* the current status. If the status changed between

View File

@ -73,8 +73,6 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
protected final SearchBuilder<HostVO> IdStatusSearch;
protected final SearchBuilder<HostVO> TypeDcSearch;
protected final SearchBuilder<HostVO> TypeDcStatusSearch;
protected final SearchBuilder<HostVO> LastPingedSearch;
protected final SearchBuilder<HostVO> LastPingedSearch2;
protected final SearchBuilder<HostVO> MsStatusSearch;
protected final SearchBuilder<HostVO> DcPrivateIpAddressSearch;
protected final SearchBuilder<HostVO> DcStorageIpAddressSearch;
@ -104,7 +102,7 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
protected final GenericSearchBuilder<HostVO, Long> HostsInStatusSearch;
protected final GenericSearchBuilder<HostVO, Long> CountRoutingByDc;
protected final SearchBuilder<HostTransferMapVO> HostTransferSearch;
protected final SearchBuilder<ClusterVO> ClusterManagedSearch;
protected SearchBuilder<ClusterVO> ClusterManagedSearch;
protected final SearchBuilder<HostVO> RoutingSearch;
protected final Attribute _statusAttr;
@ -133,16 +131,6 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
TypePodDcStatusSearch.and("status", entity.getStatus(), SearchCriteria.Op.EQ);
TypePodDcStatusSearch.done();
LastPingedSearch = createSearchBuilder();
LastPingedSearch.and("ping", LastPingedSearch.entity().getLastPinged(), SearchCriteria.Op.LT);
LastPingedSearch.and("state", LastPingedSearch.entity().getStatus(), SearchCriteria.Op.IN);
LastPingedSearch.done();
LastPingedSearch2 = createSearchBuilder();
LastPingedSearch2.and("ping", LastPingedSearch2.entity().getLastPinged(), SearchCriteria.Op.LT);
LastPingedSearch2.and("type", LastPingedSearch2.entity().getType(), SearchCriteria.Op.EQ);
LastPingedSearch2.done();
MsStatusSearch = createSearchBuilder();
MsStatusSearch.and("ms", MsStatusSearch.entity().getManagementServerId(), SearchCriteria.Op.EQ);
MsStatusSearch.and("statuses", MsStatusSearch.entity().getStatus(), SearchCriteria.Op.NIN);
@ -382,8 +370,7 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
@Override @DB
public List<HostVO> findAndUpdateDirectAgentToLoad(long lastPingSecondsAfter, Long limit, long managementServerId) {
Transaction txn = Transaction.currentTxn();
txn.start();
txn.start();
SearchCriteria<HostVO> sc = UnmanagedDirectConnectSearch.create();
sc.setParameters("lastPinged", lastPingSecondsAfter);
sc.setParameters("statuses", Status.ErrorInMaintenance, Status.Maintenance, Status.PrepareForMaintenance);
@ -682,12 +669,36 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
return findOneBy(sc);
}
@DB
@Override
public List<HostVO> findLostHosts(long timeout) {
SearchCriteria<HostVO> sc = LastPingedSearch.create();
sc.setParameters("ping", timeout);
sc.setParameters("state", Status.Up, Status.Updating, Status.Disconnected, Status.Connecting);
return listBy(sc);
Transaction txn = Transaction.currentTxn();
PreparedStatement pstmt = null;
List<HostVO> result = new ArrayList<HostVO>();
ResultSet rs = null;
try {
String sql = "select h.id from host h left join cluster c on h.cluster_id=c.id where h.last_ping < ? and h.status in ('Up', 'Updating', 'Disconnected', 'Connecting') and h.type not in ('ExternalFirewall', 'ExternalLoadBalancer', 'TrafficMonitor', 'SecondaryStorage', 'LocalSecondaryStorage') and (h.cluster_id is null or c.managed_state = 'Managed') ;" ;
pstmt = txn.prepareStatement(sql);
pstmt.setLong(1, timeout);
rs = pstmt.executeQuery();
while (rs.next()) {
long id = rs.getLong(1); //ID column
result.add(findById(id));
}
} catch (Exception e) {
s_logger.warn("Exception: ", e);
} finally {
try {
if (rs != null) {
rs.close();
}
if (pstmt != null) {
pstmt.close();
}
} catch (SQLException e) {
}
}
return result;
}
@Override
@ -697,13 +708,7 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
return listBy(sc);
}
@Override
public List<HostVO> findLostHosts2(long timeout, Type type) {
SearchCriteria<HostVO> sc = LastPingedSearch2.create();
sc.setParameters("ping", timeout);
sc.setParameters("type", type.toString());
return listBy(sc);
}
@Override
public List<HostVO> listByDataCenter(long dcId) {