From 8d9254c2bb2bdce807a2a207fb8c41e8428f8531 Mon Sep 17 00:00:00 2001 From: Alex Huang Date: Mon, 9 May 2011 13:15:06 -0700 Subject: [PATCH] propagate commit 213bfe70520b780beb2c4bb12c4ff59dc1c36f86 e108396fd11172f4fc9bcdc076d972a28391a322 --- core/src/com/cloud/host/HostVO.java | 2 +- core/src/com/cloud/host/dao/HostDao.java | 2 +- core/src/com/cloud/host/dao/HostDaoImpl.java | 331 +- .../cloud/agent/manager/AgentManagerImpl.java | 4193 +++++++++-------- .../src/com/cloud/configuration/Config.java | 3 +- 5 files changed, 2463 insertions(+), 2068 deletions(-) diff --git a/core/src/com/cloud/host/HostVO.java b/core/src/com/cloud/host/HostVO.java index 02fbbb55159..d8c54f9374a 100644 --- a/core/src/com/cloud/host/HostVO.java +++ b/core/src/com/cloud/host/HostVO.java @@ -636,7 +636,7 @@ public class HostVO implements Host { @Override public String toString() { - return new StringBuilder(type.toString()).append("-").append(Long.toString(id)).append("-").append(name).append("-").append(privateIpAddress).toString(); + return new StringBuilder("Host[").append("-").append(id).append("-").append(type).append("]").toString(); } public void setHypervisorType(Hypervisor.Type hypervisorType) { diff --git a/core/src/com/cloud/host/dao/HostDao.java b/core/src/com/cloud/host/dao/HostDao.java index 9e329fe1ff8..cd968ff75a2 100644 --- a/core/src/com/cloud/host/dao/HostDao.java +++ b/core/src/com/cloud/host/dao/HostDao.java @@ -68,7 +68,7 @@ public interface HostDao extends GenericDao { List findDirectlyConnectedHosts(); List findDirectAgentToLoad(long msid, long lastPingSecondsAfter, Long limit); - + boolean directConnect(HostVO host, long msId); /** * Mark the host as disconnected if it is in one of these states. diff --git a/core/src/com/cloud/host/dao/HostDaoImpl.java b/core/src/com/cloud/host/dao/HostDaoImpl.java index 13587acddff..09a0828aee4 100644 --- a/core/src/com/cloud/host/dao/HostDaoImpl.java +++ b/core/src/com/cloud/host/dao/HostDaoImpl.java @@ -1,8 +1,8 @@ /** * Copyright (C) 2010 Cloud.com, Inc. All rights reserved. - * + * * This software is licensed under the GNU General Public License v3 or later. - * + * * It is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or any later version. @@ -10,10 +10,10 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with this program. If not, see . - * + * */ package com.cloud.host.dao; @@ -34,9 +34,9 @@ import javax.persistence.TableGenerator; import org.apache.log4j.Logger; import com.cloud.host.Host; +import com.cloud.host.Host.Type; import com.cloud.host.HostVO; import com.cloud.host.Status; -import com.cloud.host.Host.Type; import com.cloud.host.Status.Event; import com.cloud.info.RunningHostCountInfo; import com.cloud.utils.DateUtil; @@ -49,13 +49,13 @@ import com.cloud.utils.db.SearchBuilder; import com.cloud.utils.db.SearchCriteria; import com.cloud.utils.db.Transaction; import com.cloud.utils.db.UpdateBuilder; -import com.cloud.utils.db.SearchCriteria.Func; -@Local(value = { HostDao.class }) @DB(txn=false) -@TableGenerator(name="host_req_sq", table="host", pkColumnName="id", valueColumnName="sequence", allocationSize=1) +@Local(value = { HostDao.class }) +@DB(txn = false) +@TableGenerator(name = "host_req_sq", table = "host", pkColumnName = "id", valueColumnName = "sequence", allocationSize = 1) public class HostDaoImpl extends GenericDaoBase implements HostDao { private static final Logger s_logger = Logger.getLogger(HostDaoImpl.class); - protected final VmHostDaoImpl _vmHostDao; + protected final VmHostDaoImpl _vmHostDao; protected final SearchBuilder TypePodDcStatusSearch; @@ -67,6 +67,7 @@ public class HostDaoImpl extends GenericDaoBase implements HostDao protected final SearchBuilder MsStatusSearch; protected final SearchBuilder DcPrivateIpAddressSearch; protected final SearchBuilder DcStorageIpAddressSearch; + protected final SearchBuilder DirectConnectSearch; protected final SearchBuilder GuidSearch; protected final SearchBuilder DcSearch; @@ -79,24 +80,24 @@ public class HostDaoImpl extends GenericDaoBase implements HostDao protected final SearchBuilder UnmanagedDirectConnectSearch; protected final SearchBuilder MaintenanceCountSearch; protected final SearchBuilder ClusterSearch; - + protected final Attribute _statusAttr; protected final Attribute _msIdAttr; protected final Attribute _pingTimeAttr; protected final Attribute _sequenceAttr; - + protected final DetailsDaoImpl _detailsDao = ComponentLocator.inject(DetailsDaoImpl.class); protected final HostTagsDaoImpl _hostTagsDao = ComponentLocator.inject(HostTagsDaoImpl.class); - + public HostDaoImpl() { _vmHostDao = ComponentLocator.inject(VmHostDaoImpl.class); - + MaintenanceCountSearch = createSearchBuilder(); MaintenanceCountSearch.and("cluster", MaintenanceCountSearch.entity().getClusterId(), SearchCriteria.Op.EQ); MaintenanceCountSearch.and("status", MaintenanceCountSearch.entity().getStatus(), SearchCriteria.Op.IN); MaintenanceCountSearch.done(); - + TypePodDcStatusSearch = createSearchBuilder(); HostVO entity = TypePodDcStatusSearch.entity(); TypePodDcStatusSearch.and("type", entity.getType(), SearchCriteria.Op.EQ); @@ -110,38 +111,38 @@ public class HostDaoImpl extends GenericDaoBase implements HostDao LastPingedSearch.and("ping", LastPingedSearch.entity().getLastPinged(), SearchCriteria.Op.LT); LastPingedSearch.and("state", LastPingedSearch.entity().getStatus(), SearchCriteria.Op.IN); LastPingedSearch.done(); - + LastPingedSearch2 = createSearchBuilder(); LastPingedSearch2.and("ping", LastPingedSearch2.entity().getLastPinged(), SearchCriteria.Op.LT); LastPingedSearch2.and("type", LastPingedSearch2.entity().getType(), SearchCriteria.Op.EQ); LastPingedSearch2.done(); - + MsStatusSearch = createSearchBuilder(); MsStatusSearch.and("ms", MsStatusSearch.entity().getManagementServerId(), SearchCriteria.Op.EQ); MsStatusSearch.and("statuses", MsStatusSearch.entity().getStatus(), SearchCriteria.Op.IN); MsStatusSearch.done(); - + TypeDcSearch = createSearchBuilder(); TypeDcSearch.and("type", TypeDcSearch.entity().getType(), SearchCriteria.Op.EQ); TypeDcSearch.and("dc", TypeDcSearch.entity().getDataCenterId(), SearchCriteria.Op.EQ); TypeDcSearch.done(); - + TypeDcStatusSearch = createSearchBuilder(); TypeDcStatusSearch.and("type", TypeDcStatusSearch.entity().getType(), SearchCriteria.Op.EQ); TypeDcStatusSearch.and("dc", TypeDcStatusSearch.entity().getDataCenterId(), SearchCriteria.Op.EQ); TypeDcStatusSearch.and("status", TypeDcStatusSearch.entity().getStatus(), SearchCriteria.Op.EQ); TypeDcStatusSearch.done(); - + IdStatusSearch = createSearchBuilder(); IdStatusSearch.and("id", IdStatusSearch.entity().getId(), SearchCriteria.Op.EQ); IdStatusSearch.and("states", IdStatusSearch.entity().getStatus(), SearchCriteria.Op.IN); IdStatusSearch.done(); - + DcPrivateIpAddressSearch = createSearchBuilder(); DcPrivateIpAddressSearch.and("privateIpAddress", DcPrivateIpAddressSearch.entity().getPrivateIpAddress(), SearchCriteria.Op.EQ); DcPrivateIpAddressSearch.and("dc", DcPrivateIpAddressSearch.entity().getDataCenterId(), SearchCriteria.Op.EQ); DcPrivateIpAddressSearch.done(); - + DcStorageIpAddressSearch = createSearchBuilder(); DcStorageIpAddressSearch.and("storageIpAddress", DcStorageIpAddressSearch.entity().getStorageIpAddress(), SearchCriteria.Op.EQ); DcStorageIpAddressSearch.and("dc", DcStorageIpAddressSearch.entity().getDataCenterId(), SearchCriteria.Op.EQ); @@ -150,124 +151,142 @@ public class HostDaoImpl extends GenericDaoBase implements HostDao GuidSearch = createSearchBuilder(); GuidSearch.and("guid", GuidSearch.entity().getGuid(), SearchCriteria.Op.EQ); GuidSearch.done(); - + DcSearch = createSearchBuilder(); DcSearch.and("dc", DcSearch.entity().getDataCenterId(), SearchCriteria.Op.EQ); DcSearch.done(); - + ClusterSearch = createSearchBuilder(); ClusterSearch.and("cluster", ClusterSearch.entity().getClusterId(), SearchCriteria.Op.EQ); ClusterSearch.done(); - + PodSearch = createSearchBuilder(); PodSearch.and("pod", PodSearch.entity().getPodId(), SearchCriteria.Op.EQ); PodSearch.done(); - + TypeSearch = createSearchBuilder(); TypeSearch.and("type", TypeSearch.entity().getType(), SearchCriteria.Op.EQ); TypeSearch.done(); - - StatusSearch =createSearchBuilder(); + + StatusSearch = createSearchBuilder(); StatusSearch.and("status", StatusSearch.entity().getStatus(), SearchCriteria.Op.IN); StatusSearch.done(); - + NameLikeSearch = createSearchBuilder(); NameLikeSearch.and("name", NameLikeSearch.entity().getName(), SearchCriteria.Op.LIKE); NameLikeSearch.done(); - + SequenceSearch = createSearchBuilder(); SequenceSearch.and("id", SequenceSearch.entity().getId(), SearchCriteria.Op.EQ); -// SequenceSearch.addRetrieve("sequence", SequenceSearch.entity().getSequence()); + // SequenceSearch.addRetrieve("sequence", + // SequenceSearch.entity().getSequence()); SequenceSearch.done(); - + DirectlyConnectedSearch = createSearchBuilder(); DirectlyConnectedSearch.and("resource", DirectlyConnectedSearch.entity().getResource(), SearchCriteria.Op.NNULL); DirectlyConnectedSearch.done(); - + UnmanagedDirectConnectSearch = createSearchBuilder(); UnmanagedDirectConnectSearch.and("resource", UnmanagedDirectConnectSearch.entity().getResource(), SearchCriteria.Op.NNULL); UnmanagedDirectConnectSearch.and("server", UnmanagedDirectConnectSearch.entity().getManagementServerId(), SearchCriteria.Op.NULL); UnmanagedDirectConnectSearch.and("lastPinged", UnmanagedDirectConnectSearch.entity().getLastPinged(), SearchCriteria.Op.LTEQ); UnmanagedDirectConnectSearch.done(); + DirectConnectSearch = createSearchBuilder(); + DirectConnectSearch.and("server", DirectConnectSearch.entity().getManagementServerId(), SearchCriteria.Op.NULL); + DirectConnectSearch.and("resource", DirectConnectSearch.entity().getResource(), SearchCriteria.Op.NNULL); + DirectConnectSearch.and("id", DirectConnectSearch.entity().getId(), SearchCriteria.Op.EQ); + DirectConnectSearch.done(); + _statusAttr = _allAttributes.get("status"); _msIdAttr = _allAttributes.get("managementServerId"); _pingTimeAttr = _allAttributes.get("lastPinged"); _sequenceAttr = _allAttributes.get("sequence"); - + assert (_statusAttr != null && _msIdAttr != null && _pingTimeAttr != null && _sequenceAttr != null) : "Couldn't find one of these attributes"; } - + @Override public long countBy(long clusterId, Status... statuses) { SearchCriteria sc = MaintenanceCountSearch.create(); - - sc.setParameters("status", (Object[])statuses); + + sc.setParameters("status", (Object[]) statuses); sc.setParameters("cluster", clusterId); - + List hosts = listActiveBy(sc); return hosts.size(); } - + + @Override + public boolean directConnect(HostVO host, long msId) { + SearchCriteria sc = DirectConnectSearch.create(); + sc.setParameters("id", host.getId()); + host.setManagementServerId(msId); + host.setLastPinged(System.currentTimeMillis() >> 10); + UpdateBuilder ub = getUpdateBuilder(host); + ub.set(host, _statusAttr, Status.Connecting); + return update(host, sc) > 0; + } + @Override public HostVO findSecondaryStorageHost(long dcId) { - SearchCriteria sc = TypeDcSearch.create(); - sc.setParameters("type", Host.Type.SecondaryStorage); - sc.setParameters("dc", dcId); - List storageHosts = listActiveBy(sc); - - if (storageHosts == null || storageHosts.size() != 1) { - return null; - } else { - return storageHosts.get(0); - } + SearchCriteria sc = TypeDcSearch.create(); + sc.setParameters("type", Host.Type.SecondaryStorage); + sc.setParameters("dc", dcId); + List storageHosts = listActiveBy(sc); + + if (storageHosts == null || storageHosts.size() != 1) { + return null; + } else { + return storageHosts.get(0); + } } - + @Override public List listSecondaryStorageHosts(long dcId) { SearchCriteria sc = TypeDcSearch.create(); sc.setParameters("type", Host.Type.SecondaryStorage); sc.setParameters("dc", dcId); return listActiveBy(sc); - + } - + @Override public List listSecondaryStorageHosts() { - SearchCriteria sc = TypeSearch.create(); - sc.setParameters("type", Host.Type.SecondaryStorage); - List secondaryStorageHosts = listBy(sc); - - return secondaryStorageHosts; + SearchCriteria sc = TypeSearch.create(); + sc.setParameters("type", Host.Type.SecondaryStorage); + List secondaryStorageHosts = listBy(sc); + + return secondaryStorageHosts; } - + @Override public List findDirectlyConnectedHosts() { SearchCriteria sc = DirectlyConnectedSearch.create(); return search(sc, null); } - + @Override public List findDirectAgentToLoad(long msid, long lastPingSecondsAfter, Long limit) { - SearchCriteria sc = UnmanagedDirectConnectSearch.create(); - sc.setParameters("lastPinged", lastPingSecondsAfter); + SearchCriteria sc = UnmanagedDirectConnectSearch.create(); + sc.setParameters("lastPinged", lastPingSecondsAfter); return search(sc, new Filter(HostVO.class, "clusterId", true, 0L, limit)); } - + @Override public void markHostsAsDisconnected(long msId, Status... states) { SearchCriteria sc = MsStatusSearch.create(); sc.setParameters("ms", msId); - sc.setParameters("statuses", (Object[])states); - + sc.setParameters("statuses", (Object[]) states); + HostVO host = createForUpdate(); host.setManagementServerId(null); - host.setLastPinged((System.currentTimeMillis() >> 10) - ( 10 * 60 )); + host.setLastPinged((System.currentTimeMillis() >> 10) - (10 * 60)); host.setDisconnectedOn(new Date()); - + UpdateBuilder ub = getUpdateBuilder(host); ub.set(host, "status", Status.Disconnected); - + update(ub, sc, null); } @@ -286,16 +305,16 @@ public class HostDaoImpl extends GenericDaoBase implements HostDao return listActiveBy(sc); } - + @Override public List listByCluster(long clusterId) { SearchCriteria sc = ClusterSearch.create(); - + sc.setParameters("cluster", clusterId); - + return listActiveBy(sc); } - + @Override public List listBy(Host.Type type, long dcId) { SearchCriteria sc = TypeDcStatusSearch.create(); @@ -305,50 +324,50 @@ public class HostDaoImpl extends GenericDaoBase implements HostDao return listActiveBy(sc); } - + @Override public HostVO findByPrivateIpAddressInDataCenter(long dcId, String privateIpAddress) { SearchCriteria sc = DcPrivateIpAddressSearch.create(); sc.setParameters("dc", dcId); sc.setParameters("privateIpAddress", privateIpAddress); - + return findOneActiveBy(sc); } - + @Override public HostVO findByStorageIpAddressInDataCenter(long dcId, String privateIpAddress) { SearchCriteria sc = DcStorageIpAddressSearch.create(); sc.setParameters("dc", dcId); sc.setParameters("storageIpAddress", privateIpAddress); - + return findOneActiveBy(sc); } - + @Override public void loadDetails(HostVO host) { - Map details =_detailsDao.findDetails(host.getId()); + Map details = _detailsDao.findDetails(host.getId()); host.setDetails(details); } - + @Override - public void loadHostTags(HostVO host){ - List hostTags = _hostTagsDao.gethostTags(host.getId()); - host.setHostTags(hostTags); + public void loadHostTags(HostVO host) { + List hostTags = _hostTagsDao.gethostTags(host.getId()); + host.setHostTags(hostTags); } - + @Override public boolean updateStatus(HostVO host, Event event, long msId) { Status oldStatus = host.getStatus(); long oldPingTime = host.getLastPinged(); Status newStatus = oldStatus.getNextStatus(event); - if ( host == null ) { + if (host == null) { return false; } - + if (newStatus == null) { return false; } - + SearchBuilder sb = createSearchBuilder(); sb.and("status", sb.entity().getStatus(), SearchCriteria.Op.EQ); sb.and("id", sb.entity().getId(), SearchCriteria.Op.EQ); @@ -359,16 +378,16 @@ public class HostDaoImpl extends GenericDaoBase implements HostDao sb.closeParen(); } sb.done(); - + SearchCriteria sc = sb.create(); - + sc.setParameters("status", oldStatus); sc.setParameters("id", host.getId()); if (newStatus.checkManagementServer()) { - sc.setParameters("ping", oldPingTime); - sc.setParameters("msid", msId); + sc.setParameters("ping", oldPingTime); + sc.setParameters("msid", msId); } - + UpdateBuilder ub = getUpdateBuilder(host); ub.set(host, _statusAttr, newStatus); if (newStatus.updateManagementServer()) { @@ -377,47 +396,49 @@ public class HostDaoImpl extends GenericDaoBase implements HostDao } else { ub.set(host, _msIdAttr, msId); } - if( event.equals(Event.Ping) || event.equals(Event.AgentConnected)) { - ub.set(host, _pingTimeAttr, System.currentTimeMillis() >> 10); - } + if (event.equals(Event.Ping) || event.equals(Event.AgentConnected)) { + ub.set(host, _pingTimeAttr, System.currentTimeMillis() >> 10); + } } - + int result = update(ub, sc, null); assert result <= 1 : "How can this update " + result + " rows? "; - + if (s_logger.isDebugEnabled() && result == 0) { - HostVO vo = findById(host.getId()); - assert vo != null : "How how how? : " + host.getId(); - - StringBuilder str = new StringBuilder("Unable to update host for event:").append(event.toString()); - str.append(". New=[status=").append(newStatus.toString()).append(":msid=").append(newStatus.lostConnection() ? "null" : msId).append(":lastpinged=").append(host.getLastPinged()).append("]"); - str.append("; Old=[status=").append(oldStatus.toString()).append(":msid=").append(msId).append(":lastpinged=").append(oldPingTime).append("]"); - str.append("; DB=[status=").append(vo.getStatus().toString()).append(":msid=").append(vo.getManagementServerId()).append(":lastpinged=").append(vo.getLastPinged()).append("]"); - s_logger.debug(str.toString()); + HostVO vo = findById(host.getId()); + assert vo != null : "How how how? : " + host.getId(); + + StringBuilder str = new StringBuilder("Unable to update host for event:").append(event.toString()); + str.append(". New=[status=").append(newStatus.toString()).append(":msid=").append(newStatus.lostConnection() ? "null" : msId).append(":lastpinged=").append(host.getLastPinged()) + .append("]"); + str.append("; Old=[status=").append(oldStatus.toString()).append(":msid=").append(msId).append(":lastpinged=").append(oldPingTime).append("]"); + str.append("; DB=[status=").append(vo.getStatus().toString()).append(":msid=").append(vo.getManagementServerId()).append(":lastpinged=").append(vo.getLastPinged()).append("]"); + s_logger.debug(str.toString()); } return result > 0; } - + @Override public boolean disconnect(HostVO host, Event event, long msId) { host.setDisconnectedOn(new Date()); - if(event!=null && event.equals(Event.Remove)) { + if (event != null && event.equals(Event.Remove)) { host.setGuid(null); host.setClusterId(null); } return updateStatus(host, event, msId); } - @Override @DB + @Override + @DB public boolean connect(HostVO host, long msId) { Transaction txn = Transaction.currentTxn(); long id = host.getId(); txn.start(); - + if (!updateStatus(host, Event.AgentConnected, msId)) { return false; } - + txn.commit(); return true; } @@ -432,13 +453,13 @@ public class HostDaoImpl extends GenericDaoBase implements HostDao public List findLostHosts(long timeout) { SearchCriteria sc = LastPingedSearch.create(); sc.setParameters("ping", timeout); - sc.setParameters("state", Status.Up.toString(), Status.Updating.toString(), - Status.Disconnected.toString(), Status.Down.toString()); + sc.setParameters("state", Status.Up, Status.Updating, Status.Disconnected, Status.Connecting); return listActiveBy(sc); } - + + @Override public List findHostsLike(String hostName) { - SearchCriteria sc = NameLikeSearch.create(); + SearchCriteria sc = NameLikeSearch.create(); sc.setParameters("name", "%" + hostName + "%"); return listActiveBy(sc); } @@ -457,15 +478,16 @@ public class HostDaoImpl extends GenericDaoBase implements HostDao return listActiveBy(sc); } + @Override public List listByHostPod(long podId) { SearchCriteria sc = PodSearch.create("pod", podId); return listActiveBy(sc); } - + @Override public List listByStatus(Status... status) { - SearchCriteria sc = StatusSearch.create(); - sc.setParameters("status", (Object[])status); + SearchCriteria sc = StatusSearch.create(); + sc.setParameters("status", (Object[]) status); return listActiveBy(sc); } @@ -497,7 +519,7 @@ public class HostDaoImpl extends GenericDaoBase implements HostDao } _detailsDao.persist(host.getId(), details); } - + protected void saveHostTags(HostVO host) { List hostTags = host.getHostTags(); if (hostTags == null || (hostTags != null && hostTags.isEmpty())) { @@ -505,7 +527,7 @@ public class HostDaoImpl extends GenericDaoBase implements HostDao } _hostTagsDao.persist(host.getId(), hostTags); } - + @Override public boolean configure(String name, Map params) throws ConfigurationException { if (!super.configure(name, params)) { @@ -515,62 +537,60 @@ public class HostDaoImpl extends GenericDaoBase implements HostDao if (!_vmHostDao.configure("VM Host Operations Table", new HashMap())) { return false; } - + return true; } - - @Override @DB + + @Override + @DB public HostVO persist(HostVO host) { Transaction txn = Transaction.currentTxn(); txn.start(); - + HostVO dbHost = super.persist(host); saveDetails(host); loadDetails(dbHost); saveHostTags(host); loadHostTags(dbHost); - + txn.commit(); - + return dbHost; } - - @Override @DB + + @Override + @DB public boolean update(Long hostId, HostVO host) { Transaction txn = Transaction.currentTxn(); txn.start(); - + boolean persisted = super.update(hostId, host); if (!persisted) { return persisted; } - + saveDetails(host); saveHostTags(host); - + txn.commit(); - + return persisted; } - @Override @DB + @Override + @DB public List getRunningHostCounts(Date cutTime) { - String sql = "select * from (select h.data_center_id, h.type, count(*) as count from host as h INNER JOIN mshost as m ON h.mgmt_server_id=m.msid " + - "where h.status='Up' and h.type='Computing' and m.last_update > ? " + - "group by h.data_center_id, h.type " + - "UNION ALL " + - "select h.data_center_id, h.type, count(*) as count from host as h INNER JOIN mshost as m ON h.mgmt_server_id=m.msid " + - "where h.status='Up' and h.type='SecondaryStorage' and m.last_update > ? " + - "group by h.data_center_id, h.type " + - "UNION ALL " + - "select h.data_center_id, h.type, count(*) as count from host as h INNER JOIN mshost as m ON h.mgmt_server_id=m.msid " + - "where h.status='Up' and h.type='Routing' and m.last_update > ? " + - "group by h.data_center_id, h.type) as t " + - "ORDER by t.data_center_id, t.type"; + String sql = "select * from (select h.data_center_id, h.type, count(*) as count from host as h INNER JOIN mshost as m ON h.mgmt_server_id=m.msid " + + "where h.status='Up' and h.type='Computing' and m.last_update > ? " + "group by h.data_center_id, h.type " + "UNION ALL " + + "select h.data_center_id, h.type, count(*) as count from host as h INNER JOIN mshost as m ON h.mgmt_server_id=m.msid " + + "where h.status='Up' and h.type='SecondaryStorage' and m.last_update > ? " + "group by h.data_center_id, h.type " + "UNION ALL " + + "select h.data_center_id, h.type, count(*) as count from host as h INNER JOIN mshost as m ON h.mgmt_server_id=m.msid " + + "where h.status='Up' and h.type='Routing' and m.last_update > ? " + "group by h.data_center_id, h.type) as t " + "ORDER by t.data_center_id, t.type"; - ArrayList l = new ArrayList(); - - Transaction txn = Transaction.currentTxn();; + ArrayList l = new ArrayList(); + + Transaction txn = Transaction.currentTxn(); + ; PreparedStatement pstmt = null; try { pstmt = txn.prepareAutoCloseStatement(sql); @@ -578,15 +598,15 @@ public class HostDaoImpl extends GenericDaoBase implements HostDao pstmt.setString(1, gmtCutTime); pstmt.setString(2, gmtCutTime); pstmt.setString(3, gmtCutTime); - + ResultSet rs = pstmt.executeQuery(); - while(rs.next()) { - RunningHostCountInfo info = new RunningHostCountInfo(); - info.setDcId(rs.getLong(1)); - info.setHostType(rs.getString(2)); - info.setCount(rs.getInt(3)); - - l.add(info); + while (rs.next()) { + RunningHostCountInfo info = new RunningHostCountInfo(); + info.setDcId(rs.getLong(1)); + info.setHostType(rs.getString(2)); + info.setCount(rs.getInt(3)); + + l.add(info); } } catch (SQLException e) { } catch (Throwable e) { @@ -599,13 +619,10 @@ public class HostDaoImpl extends GenericDaoBase implements HostDao if (s_logger.isTraceEnabled()) { s_logger.trace("getNextSequence(), hostId: " + hostId); } - + TableGenerator tg = _tgs.get("host_req_sq"); assert tg != null : "how can this be wrong!"; - + return s_seqFetcher.getNextSequence(Long.class, tg, hostId); } } - - - diff --git a/server/src/com/cloud/agent/manager/AgentManagerImpl.java b/server/src/com/cloud/agent/manager/AgentManagerImpl.java index b699b26d46d..5d9afb84448 100755 --- a/server/src/com/cloud/agent/manager/AgentManagerImpl.java +++ b/server/src/com/cloud/agent/manager/AgentManagerImpl.java @@ -1,8 +1,8 @@ /** * Copyright (C) 2010 Cloud.com, Inc. All rights reserved. - * + * * This software is licensed under the GNU General Public License v3 or later. - * + * * It is free software: you can redistribute it and/or modify * it under the terms of the GNU General Public License as published by * the Free Software Foundation, either version 3 of the License, or any later version. @@ -10,10 +10,10 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * + * * You should have received a copy of the GNU General Public License * along with this program. If not, see . - * + * */ package com.cloud.agent.manager; @@ -101,6 +101,7 @@ import com.cloud.host.Status; import com.cloud.host.Status.Event; import com.cloud.host.dao.DetailsDao; import com.cloud.host.dao.HostDao; +import com.cloud.host.dao.HostTagsDao; import com.cloud.hypervisor.Hypervisor; import com.cloud.maid.StackMaid; import com.cloud.maint.UpgradeManager; @@ -127,6 +128,7 @@ import com.cloud.user.dao.UserStatisticsDao; import com.cloud.utils.ActionDelegate; import com.cloud.utils.NumbersUtil; import com.cloud.utils.Pair; +import com.cloud.utils.StringUtils; import com.cloud.utils.component.Adapters; import com.cloud.utils.component.ComponentLocator; import com.cloud.utils.component.Inject; @@ -141,18 +143,16 @@ import com.cloud.utils.nio.HandlerFactory; import com.cloud.utils.nio.Link; import com.cloud.utils.nio.NioServer; import com.cloud.utils.nio.Task; -import com.cloud.utils.StringUtils; import com.cloud.vm.State; import com.cloud.vm.UserVm; import com.cloud.vm.VMInstanceVO; import com.cloud.vm.VmCharacteristics; import com.cloud.vm.dao.VMInstanceDao; -import com.cloud.host.dao.HostTagsDao; /** * Implementation of the Agent Manager. This class controls the connection to * the agents. - * + * * @config {@table || Param Name | Description | Values | Default || || port | * port to listen on for agent connection. | Integer | 8250 || || * workers | # of worker threads | Integer | 5 || || router.template.id @@ -173,1982 +173,2359 @@ import com.cloud.host.dao.HostTagsDao; **/ @Local(value = { AgentManager.class }) public class AgentManagerImpl implements AgentManager, HandlerFactory { - private static final Logger s_logger = Logger.getLogger(AgentManagerImpl.class); + private static final Logger s_logger = Logger + .getLogger(AgentManagerImpl.class); - protected ConcurrentHashMap _agents = new ConcurrentHashMap(2047); - protected List> _hostMonitors = new ArrayList>(11); - protected List> _cmdMonitors = new ArrayList>(11); - protected int _monitorId = 0; + protected ConcurrentHashMap _agents = new ConcurrentHashMap( + 2047); + protected List> _hostMonitors = new ArrayList>( + 11); + protected List> _cmdMonitors = new ArrayList>( + 11); + protected int _monitorId = 0; - protected NioServer _connection; - @Inject protected HostDao _hostDao = null; - @Inject protected UserStatisticsDao _userStatsDao = null; - @Inject protected DataCenterDao _dcDao = null; - @Inject protected VlanDao _vlanDao = null; - @Inject protected DataCenterIpAddressDaoImpl _privateIPAddressDao = null; - @Inject protected IPAddressDao _publicIPAddressDao = null; - @Inject protected HostPodDao _podDao = null; - protected Adapters _hostAllocators = null; - protected Adapters _podAllocators = null; - @Inject protected EventDao _eventDao = null; - @Inject protected VMInstanceDao _vmDao = null; - @Inject protected VolumeDao _volDao = null; - @Inject protected CapacityDao _capacityDao = null; - @Inject protected ConfigurationDao _configDao = null; - @Inject protected StoragePoolDao _storagePoolDao = null; - @Inject protected StoragePoolHostDao _storagePoolHostDao = null; - @Inject protected GuestOSCategoryDao _guestOSCategoryDao = null; - @Inject protected DetailsDao _hostDetailsDao = null; - @Inject protected ClusterDao _clusterDao; - @Inject protected HostTagsDao _hostTagsDao = null; - - private String _publicNic; - private String _privateNic; - private String _guestNic; - private String _storageNic1; - private String _storageNic2; + protected NioServer _connection; + @Inject + protected HostDao _hostDao = null; + @Inject + protected UserStatisticsDao _userStatsDao = null; + @Inject + protected DataCenterDao _dcDao = null; + @Inject + protected VlanDao _vlanDao = null; + @Inject + protected DataCenterIpAddressDaoImpl _privateIPAddressDao = null; + @Inject + protected IPAddressDao _publicIPAddressDao = null; + @Inject + protected HostPodDao _podDao = null; + protected Adapters _hostAllocators = null; + protected Adapters _podAllocators = null; + @Inject + protected EventDao _eventDao = null; + @Inject + protected VMInstanceDao _vmDao = null; + @Inject + protected VolumeDao _volDao = null; + @Inject + protected CapacityDao _capacityDao = null; + @Inject + protected ConfigurationDao _configDao = null; + @Inject + protected StoragePoolDao _storagePoolDao = null; + @Inject + protected StoragePoolHostDao _storagePoolHostDao = null; + @Inject + protected GuestOSCategoryDao _guestOSCategoryDao = null; + @Inject + protected DetailsDao _hostDetailsDao = null; + @Inject + protected ClusterDao _clusterDao; + @Inject + protected HostTagsDao _hostTagsDao = null; - - protected Adapters _discoverers = null; - protected int _port; + private String _publicNic; + private String _privateNic; + private String _guestNic; + private String _storageNic1; + private String _storageNic2; - @Inject - protected HighAvailabilityManager _haMgr = null; - @Inject - protected AlertManager _alertMgr = null; + protected Adapters _discoverers = null; + protected int _port; - @Inject - protected NetworkManager _networkMgr = null; + @Inject + protected HighAvailabilityManager _haMgr = null; + @Inject + protected AlertManager _alertMgr = null; - @Inject - protected UpgradeManager _upgradeMgr = null; + @Inject + protected NetworkManager _networkMgr = null; - @Inject - protected ConfigurationManager _configMgr; - - protected int _retry = 2; + @Inject + protected UpgradeManager _upgradeMgr = null; - protected String _name; - protected String _instance; + @Inject + protected ConfigurationManager _configMgr; - protected int _wait; - protected int _updateWait; - protected int _alertWait; - protected long _nodeId = -1; - protected int _overProvisioningFactor = 1; - protected float _cpuOverProvisioningFactor = 1; - - - protected Random _rand = new Random(System.currentTimeMillis()); + protected int _retry = 2; - protected int _pingInterval; - protected long _pingTimeout; - protected AgentMonitor _monitor = null; + protected String _name; + protected String _instance; - protected ExecutorService _executor; + protected int _wait; + protected int _updateWait; + protected int _alertWait; + protected long _nodeId = -1; + protected int _overProvisioningFactor = 1; + protected float _cpuOverProvisioningFactor = 1; - @Inject - protected VMTemplateDao _tmpltDao; - @Inject - protected VMTemplateHostDao _vmTemplateHostDao; - @Override - public boolean configure(final String name, final Map params) throws ConfigurationException { - _name = name; + protected Random _rand = new Random(System.currentTimeMillis()); - Request.initBuilder(); + protected int _pingInterval; + protected long _pingTimeout; + protected AgentMonitor _monitor = null; - final ComponentLocator locator = ComponentLocator.getCurrentLocator(); - ConfigurationDao configDao = locator.getDao(ConfigurationDao.class); - if (configDao == null) { - throw new ConfigurationException("Unable to get the configuration dao."); - } + protected ExecutorService _executor; - final Map configs = configDao.getConfiguration("AgentManager", params); - - _publicNic = configDao.getValue(Config.XenPublicNetwork.key()); - _privateNic = configDao.getValue(Config.XenPrivateNetwork.key()); - _guestNic = configDao.getValue(Config.XenGuestNetwork.key()); - _storageNic1 = configDao.getValue(Config.XenStorageNetwork1.key()); - _storageNic2 = configDao.getValue(Config.XenStorageNetwork2.key()); + @Inject + protected VMTemplateDao _tmpltDao; + @Inject + protected VMTemplateHostDao _vmTemplateHostDao; - _port = NumbersUtil.parseInt(configs.get("port"), 8250); - final int workers = NumbersUtil.parseInt(configs.get("workers"), 5); + @Override + public boolean configure(final String name, final Map params) + throws ConfigurationException { + _name = name; - String value = configs.get("ping.interval"); - _pingInterval = NumbersUtil.parseInt(value, 60); + Request.initBuilder(); - value = configs.get("wait"); - _wait = NumbersUtil.parseInt(value, 1800) * 1000; + final ComponentLocator locator = ComponentLocator.getCurrentLocator(); + ConfigurationDao configDao = locator.getDao(ConfigurationDao.class); + if (configDao == null) { + throw new ConfigurationException( + "Unable to get the configuration dao."); + } - value = configs.get("alert.wait"); - _alertWait = NumbersUtil.parseInt(value, 1800); + final Map configs = configDao.getConfiguration( + "AgentManager", params); - value = configs.get("update.wait"); - _updateWait = NumbersUtil.parseInt(value, 600); + _publicNic = configDao.getValue(Config.XenPublicNetwork.key()); + _privateNic = configDao.getValue(Config.XenPrivateNetwork.key()); + _guestNic = configDao.getValue(Config.XenGuestNetwork.key()); + _storageNic1 = configDao.getValue(Config.XenStorageNetwork1.key()); + _storageNic2 = configDao.getValue(Config.XenStorageNetwork2.key()); - value = configs.get("ping.timeout"); - final float multiplier = value != null ? Float.parseFloat(value) : 2.5f; - _pingTimeout = (long) (multiplier * _pingInterval); + _port = NumbersUtil.parseInt(configs.get("port"), 8250); + final int workers = NumbersUtil.parseInt(configs.get("workers"), 5); - s_logger.info("Ping Timeout is " + _pingTimeout); + String value = configs.get("ping.interval"); + _pingInterval = NumbersUtil.parseInt(value, 60); - _instance = configs.get("instance.name"); - if (_instance == null) { - _instance = "DEFAULT"; - } + value = configs.get("wait"); + _wait = NumbersUtil.parseInt(value, 1800) * 1000; - _hostAllocators = locator.getAdapters(HostAllocator.class); - if (_hostAllocators == null || !_hostAllocators.isSet()) { - throw new ConfigurationException("Unable to find an host allocator."); - } + value = configs.get("alert.wait"); + _alertWait = NumbersUtil.parseInt(value, 1800); - _podAllocators = locator.getAdapters(PodAllocator.class); - if (_podAllocators == null || !_podAllocators.isSet()) { - throw new ConfigurationException("Unable to find an pod allocator."); - } - - _discoverers = locator.getAdapters(Discoverer.class); + value = configs.get("update.wait"); + _updateWait = NumbersUtil.parseInt(value, 600); - if (_nodeId == -1) { - // FIXME: We really should not do this like this. It should be done - // at config time and is stored as a config variable. - _nodeId = MacAddress.getMacAddress().toLong(); - } + value = configs.get("ping.timeout"); + final float multiplier = value != null ? Float.parseFloat(value) : 2.5f; + _pingTimeout = (long) (multiplier * _pingInterval); - _hostDao.markHostsAsDisconnected(_nodeId, Status.Up, Status.Connecting, Status.Updating, Status.Disconnected, Status.Down); - - _monitor = new AgentMonitor(_nodeId, _hostDao, _volDao, _vmDao, _dcDao, _podDao, this, _alertMgr, _pingTimeout); - registerForHostEvents(_monitor, true, true, false); + s_logger.info("Ping Timeout is " + _pingTimeout); - _executor = new ThreadPoolExecutor(10, 100, 60l, TimeUnit.SECONDS, new LinkedBlockingQueue(), new NamedThreadFactory("AgentTaskPool")); + _instance = configs.get("instance.name"); + if (_instance == null) { + _instance = "DEFAULT"; + } - String overProvisioningFactorStr = configs.get("storage.overprovisioning.factor"); - _overProvisioningFactor = NumbersUtil.parseInt(overProvisioningFactorStr, 1); - - String cpuOverProvisioningFactorStr = configs.get("cpu.overprovisioning.factor"); - _cpuOverProvisioningFactor = NumbersUtil.parseFloat(cpuOverProvisioningFactorStr, 1); - if(_cpuOverProvisioningFactor < 1){ - _cpuOverProvisioningFactor = 1; - } + _hostAllocators = locator.getAdapters(HostAllocator.class); + if (_hostAllocators == null || !_hostAllocators.isSet()) { + throw new ConfigurationException( + "Unable to find an host allocator."); + } - _connection = new NioServer("AgentManager", _port, workers + 10, this); - - s_logger.info("Listening on " + _port + " with " + workers + " workers"); + _podAllocators = locator.getAdapters(PodAllocator.class); + if (_podAllocators == null || !_podAllocators.isSet()) { + throw new ConfigurationException("Unable to find an pod allocator."); + } - return true; - } + _discoverers = locator.getAdapters(Discoverer.class); - @Override - public Task create(Task.Type type, Link link, byte[] data) { - return new AgentHandler(type, link, data); - } + if (_nodeId == -1) { + // FIXME: We really should not do this like this. It should be done + // at config time and is stored as a config variable. + _nodeId = MacAddress.getMacAddress().toLong(); + } - @Override - public int registerForHostEvents(final Listener listener, boolean connections, boolean commands, boolean priority) { - synchronized (_hostMonitors) { - _monitorId++; - if (connections) { - if (priority) { - _hostMonitors.add(0, new Pair(_monitorId, listener)); - } else { - _hostMonitors.add(new Pair(_monitorId, listener)); - } - } - if (commands) { - if (priority) { - _cmdMonitors.add(0, new Pair(_monitorId, listener)); - } else { - _cmdMonitors.add(new Pair(_monitorId, listener)); - } - } - if (s_logger.isDebugEnabled()) { - s_logger.debug("Registering listener " + listener.getClass().getSimpleName() + " with id " + _monitorId); - } - return _monitorId; - } - } + _hostDao.markHostsAsDisconnected(_nodeId, Status.Up, Status.Connecting, + Status.Updating, Status.Disconnected, Status.Down); - @Override - public void unregisterForHostEvents(final int id) { - s_logger.debug("Deregistering " + id); - _hostMonitors.remove(id); - } + _monitor = new AgentMonitor(_nodeId, _hostDao, _volDao, _vmDao, _dcDao, + _podDao, this, _alertMgr, _pingTimeout); + registerForHostEvents(_monitor, true, true, false); - private AgentControlAnswer handleControlCommand(AgentAttache attache, final AgentControlCommand cmd) { - AgentControlAnswer answer = null; - - for (Pair listener : _cmdMonitors) { - answer = listener.second().processControlCommand(attache.getId(), cmd); - - if(answer != null) - return answer; - } - - s_logger.warn("No handling of agent control command: " + cmd.toString() + " sent from " + attache.getId()); - return new AgentControlAnswer(cmd); - } - - public void handleCommands(AgentAttache attache, final long sequence, final Command[] cmds) { - for (Pair listener : _cmdMonitors) { - boolean processed = listener.second().processCommand(attache.getId(), sequence, cmds); - if (s_logger.isTraceEnabled()) { - s_logger.trace("SeqA " + attache.getId() + "-" + sequence + ": " + (processed ? "processed" : "not processed") + " by " + listener.getClass()); - } - } - } + _executor = new ThreadPoolExecutor(16, 100, 60l, TimeUnit.SECONDS, + new LinkedBlockingQueue(), new NamedThreadFactory( + "AgentTaskPool")); - public AgentAttache findAttache(long hostId) { - return _agents.get(hostId); - } + String overProvisioningFactorStr = configs + .get("storage.overprovisioning.factor"); + _overProvisioningFactor = NumbersUtil.parseInt( + overProvisioningFactorStr, 1); - - @Override - public Set getConnectedHosts() { - // make the returning set be safe for concurrent iteration - final HashSet result = new HashSet(); + String cpuOverProvisioningFactorStr = configs + .get("cpu.overprovisioning.factor"); + _cpuOverProvisioningFactor = NumbersUtil.parseFloat( + cpuOverProvisioningFactorStr, 1); + if (_cpuOverProvisioningFactor < 1) { + _cpuOverProvisioningFactor = 1; + } - synchronized (_agents) { - final Set s = _agents.keySet(); - for (final Long id : s) - result.add(id); - } - return result; - } + _connection = new NioServer("AgentManager", _port, workers + 10, this); - @Override - public Host findHost(final Host.Type type, final DataCenterVO dc, final HostPodVO pod, final StoragePoolVO sp, - final ServiceOffering offering, final VMTemplateVO template, VMInstanceVO vm, - Host currentHost, final Set avoid) { - VmCharacteristics vmc = new VmCharacteristics(vm.getType()); - Enumeration en = _hostAllocators.enumeration(); - while (en.hasMoreElements()) { - final HostAllocator allocator = en.nextElement(); - final Host host = allocator.allocateTo(vmc, offering, type, dc, pod, sp, template, avoid); - if (host == null) { - continue; - } else { - return host; - } - } + s_logger.info("Listening on " + _port + " with " + workers + " workers"); - s_logger.warn("findHost() could not find a non-null host."); - return null; - } - - @Override - public List listByDataCenter(long dcId) { - List pods = _podDao.listByDataCenterId(dcId); - ArrayList pcs = new ArrayList(); - for (HostPodVO pod : pods) { - List clusters = _clusterDao.listByPodId(pod.getId()); - if (clusters.size() == 0) { - pcs.add(new PodCluster(pod, null)); - } else { - for (ClusterVO cluster : clusters) { - pcs.add(new PodCluster(pod, cluster)); - } - } - } - return pcs; - } - - @Override - public List listByPod(long podId) { - ArrayList pcs = new ArrayList(); - HostPodVO pod = _podDao.findById(podId); - if (pod == null) { - return pcs; - } - List clusters = _clusterDao.listByPodId(pod.getId()); - if (clusters.size() == 0) { - pcs.add(new PodCluster(pod, null)); - } else { - for (ClusterVO cluster : clusters) { - pcs.add(new PodCluster(pod, cluster)); - } - } - return pcs; - } - - - - protected AgentAttache handleDirectConnect(ServerResource resource, StartupCommand[] startup, Map details, boolean old, List hostTags) { - if (startup == null) { - return null; - } - HostVO server = createHost(startup, resource, details, old, hostTags); - if (server == null) { - return null; - } - - long id = server.getId(); - - AgentAttache attache = createAttache(id, server, resource); - if (attache.isReady()) { - StartupAnswer[] answers = new StartupAnswer[startup.length]; - for (int i = 0; i < answers.length; i++) { - answers[i] = new StartupAnswer(startup[i], attache.getId(), _pingInterval); - } - - attache.process(answers); - } - - attache = notifyMonitorsOfConnection(attache, startup); - return attache; - } - - @Override - public List discoverHosts(long dcId, Long podId, Long clusterId, URI url, String username, String password, String hostTags) throws IllegalArgumentException, DiscoveryException { - List hosts = new ArrayList(); - s_logger.info("Trying to add a new host at " + url + " in data center " + dcId); - - List hostTagList = StringUtils.csvTagsToList(hostTags); - - Enumeration en = _discoverers.enumeration(); - while (en.hasMoreElements()) { - Discoverer discoverer = en.nextElement(); - Map> resources = discoverer.find(dcId, podId, clusterId, url, username, password); - if (resources != null) { - for (Map.Entry> entry : resources.entrySet()) { - ServerResource resource = entry.getKey(); - - AgentAttache attache = simulateStart(resource, entry.getValue(), true, hostTagList); - if (attache != null) { - hosts.add(_hostDao.findById(attache.getId())); - } - discoverer.postDiscovery(hosts, _nodeId); - - } - s_logger.info("server resources successfully discovered by " + discoverer.getName()); - return hosts; - } - } - - s_logger.warn("Unable to find the server resources at " + url); - return hosts; - } - - @Override - @DB - public boolean deleteHost(long hostId) { - Transaction txn = Transaction.currentTxn(); - try { - HostVO host = _hostDao.findById(hostId); - if (host == null) { - if (s_logger.isDebugEnabled()) { - s_logger.debug("Host: " + hostId + " does not even exist. Delete call is ignored."); - } - return true; - } - if (host.getType() == Type.SecondaryStorage) { - return deleteSecondaryStorageHost(host); - } - if (s_logger.isDebugEnabled()) { - s_logger.debug("Delete Host: " + hostId + " Guid:" + host.getGuid()); - } - - if (host.getType() == Type.Routing && host.getHypervisorType() == Hypervisor.Type.XenServer ) { - if (host.getClusterId() != null) { - List hosts = _hostDao.listBy(Type.Routing, host.getClusterId(), host.getPodId(), host.getDataCenterId()); - hosts.add(host); - boolean success = true; - for( HostVO thost: hosts ) { - long thostId = thost.getId(); - - PoolEjectCommand eject = new PoolEjectCommand(host.getGuid()); - Answer answer = easySend(thostId, eject); - if( answer != null && answer.getResult()) { - s_logger.debug("Eject Host: " + hostId + " from " + thostId + " Succeed"); - success = true; - break; - - } else { - success = false; - s_logger.debug("Eject Host: " + hostId + " from " + thostId + " failed due to " + (answer != null ? answer.getDetails() : "no answer")); - } - } - if( !success ){ - String msg = "Unable to eject host " + host.getGuid() + " due to there is no host up in this cluster, please execute xe pool-eject host-uuid=" - + host.getGuid() + "in this host " + host.getPrivateIpAddress(); - s_logger.info(msg); - _alertMgr.sendAlert(AlertManager.ALERT_TYPE_HOST, host.getDataCenterId(), host.getPodId(),"Unable to eject host " + host.getGuid(), msg); - } - } - } - txn.start(); - - _dcDao.releasePrivateIpAddress(host.getPrivateIpAddress(), host.getDataCenterId(), null); - AgentAttache attache = _agents.get(hostId); - handleDisconnect(attache, Status.Event.Remove, false); - - /*Disconnected agent needs special handling here*/ - - //delete host details - _hostDetailsDao.deleteDetails(hostId); - host.setGuid(null); - host.setClusterId(null); - _hostDao.update(host.getId(), host); - _hostDao.remove(hostId); - - //1. Get the pool_ids from the host ref table - ArrayList pool_ids = _storagePoolHostDao.getPoolIds(hostId); - - //2.Delete the associated entries in host ref table - _storagePoolHostDao.deletePrimaryRecordsForHost(hostId); - - //3.For pool ids you got, delete entries in pool table where type='FileSystem' || 'LVM' - for( Long poolId : pool_ids) { - StoragePoolVO storagePool = _storagePoolDao.findById(poolId); - if( storagePool.isLocal()) { - storagePool.setUuid(null); - storagePool.setClusterId(null); - _storagePoolDao.update(poolId, storagePool); - _storagePoolDao.remove(poolId); - } - } - txn.commit(); - return true; - } catch (Throwable t) { - s_logger.error("Unable to delete host: " + hostId, t); - return false; - } - } - - @DB - protected boolean deleteSecondaryStorageHost(HostVO secStorageHost) { - long zoneId = secStorageHost.getDataCenterId(); - long hostId = secStorageHost.getId(); - Transaction txn = Transaction.currentTxn(); - try { - List allVmsInZone = _vmDao.listByZoneId(zoneId); - if (!allVmsInZone.isEmpty()) { - s_logger.warn("Cannot delete secondary storage host when there are " + allVmsInZone.size() + " vms in zone " + zoneId); - return false; - } - txn.start(); - - if (!_hostDao.updateStatus(secStorageHost, Event.MaintenanceRequested, _nodeId)) { - if (s_logger.isDebugEnabled()) { - s_logger.debug("Unable to take host " + hostId + " into maintenance mode. Delete call is ignored"); - } - return false; - } - if (!_hostDao.updateStatus(secStorageHost, Event.PreparationComplete, _nodeId)) { - if (s_logger.isDebugEnabled()) { - s_logger.debug("Unable to take host " + hostId + " into maintenance mode. Delete call is ignored"); - } - return false; - } - - AgentAttache attache = _agents.get(hostId); - handleDisconnect(attache, Status.Event.Remove, false); - _hostDao.remove(secStorageHost.getId()); - /*Disconnected agent needs special handling here*/ - secStorageHost.setGuid(null); - txn.commit(); - return true; - }catch (Throwable t) { - s_logger.error("Unable to delete sec storage host: " + secStorageHost.getId(), t); - return false; - } + return true; } @Override - public boolean isVirtualMachineUpgradable(final UserVm vm, final ServiceOffering offering) { - Enumeration en = _hostAllocators.enumeration(); - boolean isMachineUpgradable = true; - while (isMachineUpgradable && en.hasMoreElements()) { - final HostAllocator allocator = en.nextElement(); - isMachineUpgradable = allocator.isVirtualMachineUpgradable(vm, offering); + public Task create(Task.Type type, Link link, byte[] data) { + return new AgentHandler(type, link, data); + } + + @Override + public int registerForHostEvents(final Listener listener, + boolean connections, boolean commands, boolean priority) { + synchronized (_hostMonitors) { + _monitorId++; + if (connections) { + if (priority) { + _hostMonitors.add(0, new Pair( + _monitorId, listener)); + } else { + _hostMonitors.add(new Pair(_monitorId, + listener)); + } + } + if (commands) { + if (priority) { + _cmdMonitors.add(0, new Pair(_monitorId, + listener)); + } else { + _cmdMonitors.add(new Pair(_monitorId, + listener)); + } + } + if (s_logger.isDebugEnabled()) { + s_logger.debug("Registering listener " + + listener.getClass().getSimpleName() + " with id " + + _monitorId); + } + return _monitorId; + } + } + + @Override + public void unregisterForHostEvents(final int id) { + s_logger.debug("Deregistering " + id); + _hostMonitors.remove(id); + } + + private AgentControlAnswer handleControlCommand(AgentAttache attache, + final AgentControlCommand cmd) { + AgentControlAnswer answer = null; + + for (Pair listener : _cmdMonitors) { + answer = listener.second().processControlCommand(attache.getId(), + cmd); + + if (answer != null) + return answer; + } + + s_logger.warn("No handling of agent control command: " + cmd.toString() + + " sent from " + attache.getId()); + return new AgentControlAnswer(cmd); + } + + public void handleCommands(AgentAttache attache, final long sequence, + final Command[] cmds) { + for (Pair listener : _cmdMonitors) { + boolean processed = listener.second().processCommand( + attache.getId(), sequence, cmds); + if (s_logger.isTraceEnabled()) { + s_logger.trace("SeqA " + attache.getId() + "-" + sequence + + ": " + (processed ? "processed" : "not processed") + + " by " + listener.getClass()); + } + } + } + + public AgentAttache findAttache(long hostId) { + AgentAttache attache = null; + synchronized (_agents) { + attache = _agents.get(hostId); + } + return attache; + } + + @Override + public Set getConnectedHosts() { + // make the returning set be safe for concurrent iteration + final HashSet result = new HashSet(); + + synchronized (_agents) { + final Set s = _agents.keySet(); + for (final Long id : s) + result.add(id); + } + return result; + } + + @Override + public Host findHost(final Host.Type type, final DataCenterVO dc, + final HostPodVO pod, final StoragePoolVO sp, + final ServiceOffering offering, final VMTemplateVO template, + VMInstanceVO vm, Host currentHost, final Set avoid) { + VmCharacteristics vmc = new VmCharacteristics(vm.getType()); + Enumeration en = _hostAllocators.enumeration(); + while (en.hasMoreElements()) { + final HostAllocator allocator = en.nextElement(); + final Host host = allocator.allocateTo(vmc, offering, type, dc, + pod, sp, template, avoid); + if (host == null) { + continue; + } else { + return host; + } + } + + s_logger.warn("findHost() could not find a non-null host."); + return null; + } + + @Override + public List listByDataCenter(long dcId) { + List pods = _podDao.listByDataCenterId(dcId); + ArrayList pcs = new ArrayList(); + for (HostPodVO pod : pods) { + List clusters = _clusterDao.listByPodId(pod.getId()); + if (clusters.size() == 0) { + pcs.add(new PodCluster(pod, null)); + } else { + for (ClusterVO cluster : clusters) { + pcs.add(new PodCluster(pod, cluster)); + } + } + } + return pcs; + } + + @Override + public List listByPod(long podId) { + ArrayList pcs = new ArrayList(); + HostPodVO pod = _podDao.findById(podId); + if (pod == null) { + return pcs; + } + List clusters = _clusterDao.listByPodId(pod.getId()); + if (clusters.size() == 0) { + pcs.add(new PodCluster(pod, null)); + } else { + for (ClusterVO cluster : clusters) { + pcs.add(new PodCluster(pod, cluster)); + } + } + return pcs; + } + + protected AgentAttache handleDirectConnect(ServerResource resource, + StartupCommand[] startup, Map details, boolean old, + List hostTags) { + if (startup == null) { + return null; + } + HostVO server = createHost(startup, resource, details, old, hostTags); + if (server == null) { + return null; + } + + long id = server.getId(); + + AgentAttache attache = createAttache(id, server, resource); + StartupAnswer[] answers = new StartupAnswer[startup.length]; + for (int i = 0; i < answers.length; i++) { + answers[i] = new StartupAnswer(startup[i], attache.getId(), _pingInterval); } - return isMachineUpgradable; - } + attache.process(answers); - protected int getPingInterval() { - return _pingInterval; - } + attache = notifyMonitorsOfConnection(attache, startup); + return attache; + } - @Override - public Answer send(final Long hostId, final Command cmd, final int timeout) throws AgentUnavailableException, OperationTimedoutException { - Answer[] answers = send(hostId, new Command[] { cmd }, true, timeout); - if (answers != null && !(answers[0] instanceof UnsupportedAnswer)) { - return answers[0]; - } - - if (answers != null && (answers[0] instanceof UnsupportedAnswer)) { - s_logger.warn("Unsupported Command: " + answers[0].getDetails()); - return answers[0]; - } + @Override + public List discoverHosts(long dcId, Long podId, Long clusterId, + URI url, String username, String password, String hostTags) + throws IllegalArgumentException, DiscoveryException { + List hosts = new ArrayList(); + s_logger.info("Trying to add a new host at " + url + " in data center " + + dcId); - return null; - } + List hostTagList = StringUtils.csvTagsToList(hostTags); - @Override - public Answer[] send(final Long hostId, final Command[] cmds, final boolean stopOnError, final int timeout) throws AgentUnavailableException, - OperationTimedoutException { - assert hostId != null : "Who's not checking the agent id before sending? ... (finger wagging)"; - if (hostId == null) { - throw new AgentUnavailableException(-1); - } + Enumeration en = _discoverers.enumeration(); + while (en.hasMoreElements()) { + Discoverer discoverer = en.nextElement(); + Map> resources = discoverer + .find(dcId, podId, clusterId, url, username, password); + if (resources != null) { + for (Map.Entry> entry : resources + .entrySet()) { + ServerResource resource = entry.getKey(); - assert cmds.length > 0 : "Ask yourself this about a hundred times. Why am I sending zero length commands?"; + AgentAttache attache = simulateStart(null, resource, + entry.getValue(), true, hostTagList); + if (attache != null) { + hosts.add(_hostDao.findById(attache.getId())); + } + discoverer.postDiscovery(hosts, _nodeId); - if (cmds.length == 0) { - return new Answer[0]; - } + } + s_logger.info("server resources successfully discovered by " + + discoverer.getName()); + return hosts; + } + } - final AgentAttache agent = getAttache(hostId); - if (agent == null || agent.isClosed()) { - throw new AgentUnavailableException("agent not logged into this management server", hostId); - } + s_logger.warn("Unable to find the server resources at " + url); + return hosts; + } - long seq = _hostDao.getNextSequence(hostId); - Request req = new Request(seq, hostId, _nodeId, cmds, stopOnError, true); - return agent.send(req, timeout); - } + @Override + @DB + public boolean deleteHost(long hostId) { + Transaction txn = Transaction.currentTxn(); + try { + HostVO host = _hostDao.findById(hostId); + if (host == null) { + if (s_logger.isDebugEnabled()) { + s_logger.debug("Host: " + hostId + + " does not even exist. Delete call is ignored."); + } + return true; + } + if (host.getType() == Type.SecondaryStorage) { + return deleteSecondaryStorageHost(host); + } + if (s_logger.isDebugEnabled()) { + s_logger.debug("Delete Host: " + hostId + " Guid:" + + host.getGuid()); + } - protected Status investigate(AgentAttache agent) { - Long hostId = agent.getId(); - if (s_logger.isDebugEnabled()) { - s_logger.debug("checking if agent (" + hostId + ") is alive"); - } + if (host.getType() == Type.Routing + && host.getHypervisorType() == Hypervisor.Type.XenServer) { + if (host.getClusterId() != null) { + List hosts = _hostDao.listBy(Type.Routing, + host.getClusterId(), host.getPodId(), + host.getDataCenterId()); + hosts.add(host); + boolean success = true; + for (HostVO thost : hosts) { + long thostId = thost.getId(); - try { - long seq = _hostDao.getNextSequence(hostId); - Request req = new Request(seq, hostId, _nodeId, new Command[] { new CheckHealthCommand() }, true, true); - Answer[] answers = agent.send(req, 50 * 1000); - if (answers != null && answers[0] != null ) { - Status status = answers[0].getResult() ? Status.Up : Status.Down; - if (s_logger.isDebugEnabled()) { - s_logger.debug("agent (" + hostId + ") responded to checkHeathCommand, reporting that agent is " + status); - } - return status; - } - } catch (AgentUnavailableException e) { - s_logger.debug("Agent is unavailable so we move on."); - } catch (OperationTimedoutException e) { - s_logger.debug("Timed Out " + e.getMessage()); - } + PoolEjectCommand eject = new PoolEjectCommand( + host.getGuid()); + Answer answer = easySend(thostId, eject); + if (answer != null && answer.getResult()) { + s_logger.debug("Eject Host: " + hostId + " from " + + thostId + " Succeed"); + success = true; + break; - return _haMgr.investigate(hostId); - } + } else { + success = false; + s_logger.debug("Eject Host: " + + hostId + + " from " + + thostId + + " failed due to " + + (answer != null ? answer.getDetails() + : "no answer")); + } + } + if (!success) { + String msg = "Unable to eject host " + + host.getGuid() + + " due to there is no host up in this cluster, please execute xe pool-eject host-uuid=" + + host.getGuid() + "in this host " + + host.getPrivateIpAddress(); + s_logger.info(msg); + _alertMgr.sendAlert(AlertManager.ALERT_TYPE_HOST, + host.getDataCenterId(), host.getPodId(), + "Unable to eject host " + host.getGuid(), msg); + } + } + } + txn.start(); - protected AgentAttache getAttache(final Long hostId) throws AgentUnavailableException { - assert (hostId != null) : "Who didn't check their id value?"; - if (hostId == null) { - return null; - } + _dcDao.releasePrivateIpAddress(host.getPrivateIpAddress(), + host.getDataCenterId(), null); + AgentAttache attache = _agents.get(hostId); + handleDisconnect(attache, Status.Event.Remove, false); - AgentAttache agent = findAttache(hostId); - if (agent == null) { - s_logger.debug("Unable to find agent for " + hostId); - throw new AgentUnavailableException("Unable to find agent ", hostId); - } + /* Disconnected agent needs special handling here */ - return agent; - } + // delete host details + _hostDetailsDao.deleteDetails(hostId); + host.setGuid(null); + host.setClusterId(null); + _hostDao.update(host.getId(), host); + _hostDao.remove(hostId); - @Override - public long send(final Long hostId, final Command[] cmds, final boolean stopOnError, final Listener listener) throws AgentUnavailableException { - final AgentAttache agent = getAttache(hostId); - if (agent.isClosed()) { - return -1; - } + // 1. Get the pool_ids from the host ref table + ArrayList pool_ids = _storagePoolHostDao.getPoolIds(hostId); - assert cmds.length > 0 : "Why are you sending zero length commands?"; - if (cmds.length == 0) { - return -1; - } - long seq = _hostDao.getNextSequence(hostId); - Request req = new Request(seq, hostId, _nodeId, cmds, stopOnError, true); - agent.send(req, listener); - return seq; - } + // 2.Delete the associated entries in host ref table + _storagePoolHostDao.deletePrimaryRecordsForHost(hostId); - @Override - public long gatherStats(final Long hostId, final Command cmd, final Listener listener) { - final Command[] cmds = new Command[] { cmd }; - try { - return send(hostId, cmds, true, listener); - } catch (final AgentUnavailableException e) { - return -1; - } - } - - public void removeAgent(AgentAttache attache, Status nextState){ - if( attache == null ) { - return; - } - long hostId = attache.getId(); - if (s_logger.isDebugEnabled()) { - s_logger.debug("remove Agent : " + hostId); - } - AgentAttache removed = null; - boolean conflict = false; - synchronized (_agents) { - removed = _agents.remove(hostId); - if( removed != null && removed != attache ){ - conflict = true; - _agents.put(hostId, removed); - removed = attache; + // 3.For pool ids you got, delete entries in pool table where + // type='FileSystem' || 'LVM' + for (Long poolId : pool_ids) { + StoragePoolVO storagePool = _storagePoolDao.findById(poolId); + if (storagePool.isLocal()) { + storagePool.setUuid(null); + storagePool.setClusterId(null); + _storagePoolDao.update(poolId, storagePool); + _storagePoolDao.remove(poolId); + } + } + txn.commit(); + return true; + } catch (Throwable t) { + s_logger.error("Unable to delete host: " + hostId, t); + return false; + } + } + + @DB + protected boolean deleteSecondaryStorageHost(HostVO secStorageHost) { + long zoneId = secStorageHost.getDataCenterId(); + long hostId = secStorageHost.getId(); + Transaction txn = Transaction.currentTxn(); + try { + List allVmsInZone = _vmDao.listByZoneId(zoneId); + if (!allVmsInZone.isEmpty()) { + s_logger.warn("Cannot delete secondary storage host when there are " + + allVmsInZone.size() + " vms in zone " + zoneId); + return false; + } + txn.start(); + + if (!_hostDao.updateStatus(secStorageHost, + Event.MaintenanceRequested, _nodeId)) { + if (s_logger.isDebugEnabled()) { + s_logger.debug("Unable to take host " + hostId + + " into maintenance mode. Delete call is ignored"); + } + return false; + } + if (!_hostDao.updateStatus(secStorageHost, + Event.PreparationComplete, _nodeId)) { + if (s_logger.isDebugEnabled()) { + s_logger.debug("Unable to take host " + hostId + + " into maintenance mode. Delete call is ignored"); + } + return false; + } + + AgentAttache attache = _agents.get(hostId); + handleDisconnect(attache, Status.Event.Remove, false); + _hostDao.remove(secStorageHost.getId()); + + // delete the templates associated with this host + SearchCriteria templateHostSC = _vmTemplateHostDao + .createSearchCriteria(); + templateHostSC.addAnd("hostId", SearchCriteria.Op.EQ, + secStorageHost.getId()); + _vmTemplateHostDao.remove(templateHostSC); + + // delete the op_host_capacity entry + SearchCriteria secStorageCapacitySC = _capacityDao + .createSearchCriteria(); + secStorageCapacitySC.addAnd("hostOrPoolId", SearchCriteria.Op.EQ, + secStorageHost.getId()); + secStorageCapacitySC.addAnd("capacityType", SearchCriteria.Op.EQ, + CapacityVO.CAPACITY_TYPE_SECONDARY_STORAGE); + _capacityDao.remove(secStorageCapacitySC); + + /* Disconnected agent needs special handling here */ + secStorageHost.setGuid(null); + txn.commit(); + return true; + } catch (Throwable t) { + s_logger.error("Unable to delete sec storage host: " + + secStorageHost.getId(), t); + return false; + } + } + + @Override + public boolean isVirtualMachineUpgradable(final UserVm vm, + final ServiceOffering offering) { + Enumeration en = _hostAllocators.enumeration(); + boolean isMachineUpgradable = true; + while (isMachineUpgradable && en.hasMoreElements()) { + final HostAllocator allocator = en.nextElement(); + isMachineUpgradable = allocator.isVirtualMachineUpgradable(vm, + offering); + } + + return isMachineUpgradable; + } + + protected int getPingInterval() { + return _pingInterval; + } + + @Override + public Answer send(final Long hostId, final Command cmd, final int timeout) + throws AgentUnavailableException, OperationTimedoutException { + Answer[] answers = send(hostId, new Command[] { cmd }, true, timeout); + if (answers != null && !(answers[0] instanceof UnsupportedAnswer)) { + return answers[0]; + } + + if (answers != null && (answers[0] instanceof UnsupportedAnswer)) { + s_logger.warn("Unsupported Command: " + answers[0].getDetails()); + return answers[0]; + } + + return null; + } + + @Override + public Answer[] send(final Long hostId, final Command[] cmds, + final boolean stopOnError, final int timeout) + throws AgentUnavailableException, OperationTimedoutException { + assert hostId != null : "Who's not checking the agent id before sending? ... (finger wagging)"; + if (hostId == null) { + throw new AgentUnavailableException(-1); + } + + assert cmds.length > 0 : "Ask yourself this about a hundred times. Why am I sending zero length commands?"; + + if (cmds.length == 0) { + return new Answer[0]; + } + + final AgentAttache agent = getAttache(hostId); + if (agent == null || agent.isClosed()) { + throw new AgentUnavailableException( + "agent not logged into this management server", hostId); + } + + long seq = _hostDao.getNextSequence(hostId); + Request req = new Request(seq, hostId, _nodeId, cmds, stopOnError, true); + return agent.send(req, timeout); + } + + protected Status investigate(AgentAttache agent) { + Long hostId = agent.getId(); + if (s_logger.isDebugEnabled()) { + s_logger.debug("checking if agent (" + hostId + ") is alive"); + } + + try { + long seq = _hostDao.getNextSequence(hostId); + Request req = new Request(seq, hostId, _nodeId, + new Command[] { new CheckHealthCommand() }, true, true); + Answer[] answers = agent.send(req, 50 * 1000); + if (answers != null && answers[0] != null) { + Status status = answers[0].getResult() ? Status.Up + : Status.Down; + if (s_logger.isDebugEnabled()) { + s_logger.debug("agent (" + + hostId + + ") responded to checkHeathCommand, reporting that agent is " + + status); + } + return status; + } + } catch (AgentUnavailableException e) { + s_logger.debug("Agent is unavailable so we move on."); + } catch (OperationTimedoutException e) { + s_logger.debug("Timed Out " + e.getMessage()); + } + + return _haMgr.investigate(hostId); + } + + protected AgentAttache getAttache(final Long hostId) + throws AgentUnavailableException { + assert (hostId != null) : "Who didn't check their id value?"; + if (hostId == null) { + return null; + } + + AgentAttache agent = findAttache(hostId); + if (agent == null) { + s_logger.debug("Unable to find agent for " + hostId); + throw new AgentUnavailableException("Unable to find agent ", hostId); + } + + return agent; + } + + @Override + public long send(final Long hostId, final Command[] cmds, + final boolean stopOnError, final Listener listener) + throws AgentUnavailableException { + final AgentAttache agent = getAttache(hostId); + if (agent.isClosed()) { + return -1; + } + + assert cmds.length > 0 : "Why are you sending zero length commands?"; + if (cmds.length == 0) { + return -1; + } + long seq = _hostDao.getNextSequence(hostId); + Request req = new Request(seq, hostId, _nodeId, cmds, stopOnError, true); + agent.send(req, listener); + return seq; + } + + @Override + public long gatherStats(final Long hostId, final Command cmd, + final Listener listener) { + final Command[] cmds = new Command[] { cmd }; + try { + return send(hostId, cmds, true, listener); + } catch (final AgentUnavailableException e) { + return -1; + } + } + + public void removeAgent(AgentAttache attache, Status nextState) { + if (attache == null) { + return; + } + long hostId = attache.getId(); + if (s_logger.isDebugEnabled()) { + s_logger.debug("remove Agent : " + hostId); + } + AgentAttache removed = null; + boolean conflict = false; + synchronized (_agents) { + removed = _agents.remove(hostId); + if (removed != null && removed != attache) { + conflict = true; + _agents.put(hostId, removed); + removed = attache; + } + } + if (conflict) { + s_logger.debug("Agent for host " + hostId + + " is created when it is being disconnected"); + } + if (removed != null) { + removed.disconnect(nextState); + } + } + + @Override + public void disconnect(final long hostId, final Status.Event event, + final boolean investigate) { + AgentAttache attache = _agents.get(hostId); + + if (attache != null) { + disconnect(attache, event, investigate); + } else { + HostVO host = _hostDao.findById(hostId); + if (host != null && host.getRemoved() == null) { + _hostDao.updateStatus(host, event, _nodeId); + } + } + } + + public void disconnect(AgentAttache attache, final Status.Event event, + final boolean investigate) { + _executor.submit(new DisconnectTask(attache, event, investigate)); + } + + protected boolean handleDisconnect(AgentAttache attache, + Status.Event event, boolean investigate) { + if (attache == null) + return true; + + long hostId = attache.getId(); + + s_logger.info("Host " + hostId + " is disconnecting with event " + + event.toString()); + + HostVO host = _hostDao.findById(hostId); + if (host == null) { + s_logger.warn("Can't find host with " + hostId); + removeAgent(attache, Status.Removed); + return true; + } + + final Status currentState = host.getStatus(); + + if (currentState == Status.PrepareForMaintenance) { + if (s_logger.isDebugEnabled()) { + s_logger.debug("Host " + hostId + " is already " + + currentState.toString()); + } + return false; + } + + if (currentState == Status.Down || currentState == Status.Alert + || currentState == Status.Removed) { + if (s_logger.isDebugEnabled()) { + s_logger.debug("Host " + hostId + " is already " + + currentState.toString()); + } + removeAgent(attache, currentState); + return true; + } + + Status nextState = currentState.getNextStatus(event); + if (nextState == null) { + if (!(attache instanceof DirectAgentAttache)) { + return false; + } + + s_logger.debug("There is no transition from state " + + currentState.toString() + " and event " + + event.toString()); + assert false : "How did we get here. Look at the FSM"; + return false; + } + + if (s_logger.isDebugEnabled()) { + s_logger.debug("The next state is " + nextState.toString() + + ", current state is " + currentState); + } + + // Now we go and correctly diagnose what the actual situation is + if (nextState == Status.Alert && investigate) { + s_logger.info("Investigating why host " + hostId + + " has disconnected with event " + event.toString()); + + final Status determinedState = investigate(attache); + s_logger.info("The state determined is " + + (determinedState != null ? determinedState.toString() + : "undeterminable")); + + if (determinedState == null || determinedState == Status.Down) { + s_logger.error("Host is down: " + host.getId() + "-" + + host.getName() + ". Starting HA on the VMs"); + + event = Event.HostDown; + } else if (determinedState == Status.Up) { + // we effectively pinged from the server here. + s_logger.info("Agent is determined to be up and running"); + _hostDao.updateStatus(host, Event.Ping, _nodeId); + return false; + } else if (determinedState == Status.Disconnected) { + s_logger.warn("Agent is disconnected but the host is still up: " + + host.getId() + "-" + host.getName()); + if (currentState == Status.Disconnected) { + if (((System.currentTimeMillis() >> 10) - host + .getLastPinged()) > _alertWait) { + s_logger.warn("Host " + + host.getId() + + " has been disconnected pass the time it should be disconnected."); + event = Event.WaitedTooLong; + } else { + s_logger.debug("Host has been determined to be disconnected but it hasn't passed the wait time yet."); + return false; + } + } else if (currentState == Status.Updating) { + if (((System.currentTimeMillis() >> 10) - host + .getLastPinged()) > _updateWait) { + s_logger.warn("Host " + host.getId() + + " has been updating for too long"); + + event = Event.WaitedTooLong; + } else { + s_logger.debug("Host has been determined to be disconnected but it hasn't passed the wait time yet."); + return false; + } + } else if (currentState == Status.Up) { + DataCenterVO dcVO = _dcDao.findById(host.getDataCenterId()); + HostPodVO podVO = _podDao.findById(host.getPodId()); + String hostDesc = "name: " + host.getName() + " (id:" + + host.getId() + "), availability zone: " + + dcVO.getName() + ", pod: " + podVO.getName(); + _alertMgr.sendAlert(AlertManager.ALERT_TYPE_HOST, + host.getDataCenterId(), host.getPodId(), + "Host disconnected, " + hostDesc, + "If the agent for host [" + hostDesc + + "] is not restarted within " + _alertWait + + " seconds, HA will begin on the VMs"); + event = Event.AgentDisconnected; + } + } else { + // if we end up here we are in alert state, send an alert + DataCenterVO dcVO = _dcDao.findById(host.getDataCenterId()); + HostPodVO podVO = _podDao.findById(host.getPodId()); + String hostDesc = "name: " + host.getName() + " (id:" + + host.getId() + "), availability zone: " + + dcVO.getName() + ", pod: " + podVO.getName(); + _alertMgr.sendAlert(AlertManager.ALERT_TYPE_HOST, + host.getDataCenterId(), host.getPodId(), + "Host in ALERT state, " + hostDesc, + "In availability zone " + host.getDataCenterId() + + ", host is in alert state: " + host.getId() + + "-" + host.getName()); + } + } + + if (s_logger.isDebugEnabled()) { + s_logger.debug("Deregistering link for " + hostId + " with state " + + nextState); + } + + removeAgent(attache, nextState); + _hostDao.disconnect(host, event, _nodeId); + + host = _hostDao.findById(host.getId()); + if (host.getStatus() == Status.Alert || host.getStatus() == Status.Down) { + _haMgr.scheduleRestartForVmsOnHost(host); + } + + for (Pair monitor : _hostMonitors) { + if (s_logger.isDebugEnabled()) { + s_logger.debug("Sending Disconnect to listener: " + + monitor.second().getClass().getName()); + } + monitor.second().processDisconnect(hostId, nextState); + } + + return true; + } + + protected AgentAttache notifyMonitorsOfConnection(AgentAttache attache, + final StartupCommand[] cmd) { + long hostId = attache.getId(); + HostVO host = _hostDao.findById(hostId); + for (Pair monitor : _hostMonitors) { + if (s_logger.isDebugEnabled()) { + s_logger.debug("Sending Connect to listener: " + + monitor.second().getClass().getSimpleName()); + } + for (int i = 0; i < cmd.length; i++) { + if (!monitor.second().processConnect(host, cmd[i])) { + s_logger.info("Monitor " + + monitor.second().getClass().getSimpleName() + + " says not to continue the connect process for " + + hostId); + handleDisconnect(attache, Event.AgentDisconnected, false); + return attache; + } + } + } + + Long dcId = host.getDataCenterId(); + ReadyCommand ready = new ReadyCommand(dcId); + Answer answer = easySend(hostId, ready); + if (answer == null || !answer.getResult()) { + // this is tricky part for secondary storage + // make it as disconnected, wait for secondary storage VM to be up + // return the attache instead of null, even it is disconnectede + handleDisconnect(attache, Event.AgentDisconnected, false); + } + + _hostDao.updateStatus(host, Event.Ready, _nodeId); + attache.ready(); + return attache; + } + + @Override + public boolean start() { + startDirectlyConnectedHosts(); + if (_monitor != null) { + _monitor.start(); + } + _connection.start(); + + return true; + } + + public void startDirectlyConnectedHosts() { + List hosts = _hostDao.findDirectlyConnectedHosts(); + for (HostVO host : hosts) { + loadDirectlyConnectedHost(host); + } + } + + protected void loadDirectlyConnectedHost(HostVO host) { + String resourceName = host.getResource(); + ServerResource resource = null; + try { + Class clazz = Class.forName(resourceName); + Constructor constructor = clazz.getConstructor(); + resource = (ServerResource) constructor.newInstance(); + } catch (ClassNotFoundException e) { + s_logger.warn("Unable to find class " + host.getResource(), e); + return; + } catch (InstantiationException e) { + s_logger.warn("Unablet to instantiate class " + host.getResource(), + e); + return; + } catch (IllegalAccessException e) { + s_logger.warn("Illegal access " + host.getResource(), e); + return; + } catch (SecurityException e) { + s_logger.warn("Security error on " + host.getResource(), e); + return; + } catch (NoSuchMethodException e) { + s_logger.warn( + "NoSuchMethodException error on " + host.getResource(), e); + return; + } catch (IllegalArgumentException e) { + s_logger.warn( + "IllegalArgumentException error on " + host.getResource(), + e); + return; + } catch (InvocationTargetException e) { + s_logger.warn( + "InvocationTargetException error on " + host.getResource(), + e); + return; + } + + _hostDao.loadDetails(host); + + HashMap params = new HashMap(host + .getDetails().size() + 5); + params.putAll(host.getDetails()); + // private.network.device may change when reconnect + params.remove("private.network.device"); + params.put("private.network.device", _privateNic); + params.remove("public.network.device"); + params.put("public.network.device", _publicNic); + params.remove("guest.network.device"); + params.put("guest.network.device", _guestNic); + + params.put("guid", host.getGuid()); + params.put("zone", Long.toString(host.getDataCenterId())); + if (host.getPodId() != null) { + params.put("pod", Long.toString(host.getPodId())); + } + if (host.getClusterId() != null) { + params.put("cluster", Long.toString(host.getClusterId())); + } + params.put("secondary.storage.vm", "false"); + params.put("max.template.iso.size", + _configDao.getValue("max.template.iso.size")); + + try { + resource.configure(host.getName(), params); + } catch (ConfigurationException e) { + s_logger.warn("Unable to configure resource due to ", e); + return; + } + + if (!resource.start()) { + s_logger.warn("Unable to start the resource"); + return; + } + host.setLastPinged(System.currentTimeMillis() >> 10); + host.setManagementServerId(_nodeId); + _hostDao.update(host.getId(), host); + _executor.execute(new SimulateStartTask(host.getId(), resource, host + .getDetails(), null)); + } + + protected AgentAttache simulateStart(Long id, ServerResource resource, + Map details, boolean old, List hostTags) + throws IllegalArgumentException { + HostVO host = null; + if (id != null) { + host = _hostDao.findById(id); + if (host.getManagementServerId() != null) { + s_logger.info("MS " + host.getManagementServerId() + " is loading " + host); + return null; } } - if( conflict ) { - s_logger.debug("Ageent for host " + hostId + " is created when it is being disconnected"); - } - if( removed != null ) { - removed.disconnect(nextState); - } - } - - @Override - public void disconnect(final long hostId, final Status.Event event, final boolean investigate) { - AgentAttache attache = _agents.get(hostId); - - if (attache != null ) { - disconnect(attache, event, investigate); - } else { - HostVO host = _hostDao.findById(hostId); - if (host != null && host.getRemoved() == null) { - _hostDao.updateStatus(host, event, _nodeId); - } - } - } - - public void disconnect(AgentAttache attache, final Status.Event event, final boolean investigate) { - _executor.submit(new DisconnectTask(attache, event, investigate)); - } - - protected boolean handleDisconnect(AgentAttache attache, Status.Event event, boolean investigate) { - if( attache == null ) - return true; - - long hostId = attache.getId(); - - s_logger.info("Host " + hostId + " is disconnecting with event " + event.toString()); - - HostVO host = _hostDao.findById(hostId); - if (host == null) { - s_logger.warn("Can't find host with " + hostId); - removeAgent(attache, Status.Removed); - return true; - } - - final Status currentState = host.getStatus(); - - if (currentState == Status.PrepareForMaintenance) { - if (s_logger.isDebugEnabled()) { - s_logger.debug("Host " + hostId + " is already " + currentState.toString()); - } - return false; - } - - if (currentState == Status.Down || currentState == Status.Alert || currentState == Status.Removed ) { - if (s_logger.isDebugEnabled()) { - s_logger.debug("Host " + hostId + " is already " + currentState.toString()); - } - removeAgent(attache, currentState); - return true; - } - - Status nextState = currentState.getNextStatus(event); - if (nextState == null) { - if(!(attache instanceof DirectAgentAttache)) { - return false; - } - - s_logger.debug("There is no transition from state " + currentState.toString() + " and event " + event.toString()); - assert false : "How did we get here. Look at the FSM"; - return false; - } - - if (s_logger.isDebugEnabled()) { - s_logger.debug("The next state is " + nextState.toString() + ", current state is " + currentState); - } - - // Now we go and correctly diagnose what the actual situation is - if (nextState == Status.Alert && investigate) { - s_logger.info("Investigating why host " + hostId + " has disconnected with event " + event.toString()); - - final Status determinedState = investigate(attache); - s_logger.info("The state determined is " + (determinedState != null ? determinedState.toString() : "undeterminable")); - - if (determinedState == null || determinedState == Status.Down) { - s_logger.error("Host is down: " + host.getId() + "-" + host.getName() + ". Starting HA on the VMs"); - - event = Event.HostDown; - } else if (determinedState == Status.Up) { - // we effectively pinged from the server here. - s_logger.info("Agent is determined to be up and running"); - _hostDao.updateStatus(host, Event.Ping, _nodeId); - return false; - } else if (determinedState == Status.Disconnected) { - s_logger.warn("Agent is disconnected but the host is still up: " + host.getId() + "-" + host.getName()); - if (currentState == Status.Disconnected) { - if (((System.currentTimeMillis() >> 10) - host.getLastPinged()) > _alertWait) { - s_logger.warn("Host " + host.getId() + " has been disconnected pass the time it should be disconnected."); - event = Event.WaitedTooLong; - } else { - s_logger.debug("Host has been determined to be disconnected but it hasn't passed the wait time yet."); - return false; - } - } else if (currentState == Status.Updating) { - if (((System.currentTimeMillis() >> 10) - host.getLastPinged()) > _updateWait) { - s_logger.warn("Host " + host.getId() + " has been updating for too long"); - - event = Event.WaitedTooLong; - } else { - s_logger.debug("Host has been determined to be disconnected but it hasn't passed the wait time yet."); - return false; - } - } else if (currentState == Status.Up) { - DataCenterVO dcVO = _dcDao.findById(host.getDataCenterId()); - HostPodVO podVO = _podDao.findById(host.getPodId()); - String hostDesc = "name: " + host.getName() + " (id:" + host.getId() + "), availability zone: " + dcVO.getName() + ", pod: " - + podVO.getName(); - _alertMgr.sendAlert(AlertManager.ALERT_TYPE_HOST, host.getDataCenterId(), host.getPodId(), "Host disconnected, " + hostDesc, - "If the agent for host [" + hostDesc + "] is not restarted within " + _alertWait + " seconds, HA will begin on the VMs"); - event = Event.AgentDisconnected; - } - } else { - // if we end up here we are in alert state, send an alert - DataCenterVO dcVO = _dcDao.findById(host.getDataCenterId()); - HostPodVO podVO = _podDao.findById(host.getPodId()); - String hostDesc = "name: " + host.getName() + " (id:" + host.getId() + "), availability zone: " + dcVO.getName() + ", pod: " + podVO.getName(); - _alertMgr.sendAlert(AlertManager.ALERT_TYPE_HOST, host.getDataCenterId(), host.getPodId(), "Host in ALERT state, " + hostDesc, - "In availability zone " + host.getDataCenterId() + ", host is in alert state: " + host.getId() + "-" + host.getName()); - } - } - - if (s_logger.isDebugEnabled()) { - s_logger.debug("Deregistering link for " + hostId + " with state " + nextState); - } - - _hostDao.disconnect(host, event, _nodeId); - - synchronized (_agents) { - AgentAttache removed = _agents.remove(hostId); - } - host = _hostDao.findById(host.getId()); - if (host.getStatus() == Status.Alert || host.getStatus() == Status.Down) { - _haMgr.scheduleRestartForVmsOnHost(host); - } - attache.disconnect(nextState); - - for (Pair monitor : _hostMonitors) { - if (s_logger.isDebugEnabled()) { - s_logger.debug("Sending Disconnect to listener: " + monitor.second().getClass().getName()); - } - monitor.second().processDisconnect(hostId, nextState); - } - - return true; - } - - protected AgentAttache notifyMonitorsOfConnection(AgentAttache attache, final StartupCommand[] cmd) { - long hostId = attache.getId(); - HostVO host = _hostDao.findById(hostId); - for (Pair monitor : _hostMonitors) { - if (s_logger.isDebugEnabled()) { - s_logger.debug("Sending Connect to listener: " + monitor.second().getClass().getSimpleName()); - } - for (int i = 0; i < cmd.length; i++) { - if (!monitor.second().processConnect(host, cmd[i])) { - s_logger.info("Monitor " + monitor.second().getClass().getSimpleName() + " says not to continue the connect process for " + hostId); - handleDisconnect(attache, Event.AgentDisconnected, false); - return attache; - } - } - } - - Long dcId = host.getDataCenterId(); - ReadyCommand ready = new ReadyCommand(dcId); - Answer answer = easySend(hostId, ready); - if (answer == null || !answer.getResult()) { - // this is tricky part for secondary storage - // make it as disconnected, wait for secondary storage VM to be up - // return the attache instead of null, even it is disconnectede - handleDisconnect(attache, Event.AgentDisconnected, false); - } - - _hostDao.updateStatus(host, Event.Ready, _nodeId); - attache.ready(); - return attache; - } - - @Override - public boolean start() { - startDirectlyConnectedHosts(); - if (_monitor != null) { - _monitor.start(); - } - _connection.start(); - - return true; - } - - public void startDirectlyConnectedHosts() { - List hosts = _hostDao.findDirectlyConnectedHosts(); - for (HostVO host : hosts) { - loadDirectlyConnectedHost(host); - } - } - - protected void loadDirectlyConnectedHost(HostVO host) { - String resourceName = host.getResource(); - ServerResource resource = null; - try { - Class clazz = Class.forName(resourceName); - Constructor constructor = clazz.getConstructor(); - resource = (ServerResource) constructor.newInstance(); - } catch (ClassNotFoundException e) { - s_logger.warn("Unable to find class " + host.getResource(), e); - return; - } catch (InstantiationException e) { - s_logger.warn("Unablet to instantiate class " + host.getResource(), e); - return; - } catch (IllegalAccessException e) { - s_logger.warn("Illegal access " + host.getResource(), e); - return; - } catch (SecurityException e) { - s_logger.warn("Security error on " + host.getResource(), e); - return; - } catch (NoSuchMethodException e) { - s_logger.warn("NoSuchMethodException error on " + host.getResource(), e); - return; - } catch (IllegalArgumentException e) { - s_logger.warn("IllegalArgumentException error on " + host.getResource(), e); - return; - } catch (InvocationTargetException e) { - s_logger.warn("InvocationTargetException error on " + host.getResource(), e); - return; - } - - _hostDao.loadDetails(host); - - HashMap params = new HashMap(host.getDetails().size() + 5); - params.putAll(host.getDetails()); - // private.network.device may change when reconnect - params.remove("private.network.device"); - params.put("private.network.device", _privateNic); - params.remove("public.network.device"); - params.put("public.network.device", _publicNic); - params.remove("guest.network.device"); - params.put("guest.network.device", _guestNic); - - - params.put("guid", host.getGuid()); - params.put("zone", Long.toString(host.getDataCenterId())); - if (host.getPodId() != null) { - params.put("pod", Long.toString(host.getPodId())); - } - if (host.getClusterId() != null) { - params.put("cluster", Long.toString(host.getClusterId())); - } - params.put("secondary.storage.vm", "false"); - params.put("max.template.iso.size", _configDao.getValue("max.template.iso.size")); - - try { - resource.configure(host.getName(), params); - } catch (ConfigurationException e) { - s_logger.warn("Unable to configure resource due to ", e); - return; - } - - if (!resource.start()) { - s_logger.warn("Unable to start the resource"); - return; - } - host.setLastPinged(System.currentTimeMillis() >> 10); - host.setManagementServerId(_nodeId); - _hostDao.update(host.getId(), host); - _executor.execute(new SimulateStartTask(host.getId(), resource, host.getDetails(), null)); - } - - protected AgentAttache simulateStart(ServerResource resource, Map details, boolean old, List hostTags) throws IllegalArgumentException{ StartupCommand[] cmds = resource.initialize(); - if (cmds == null ) - return null; - - AgentAttache attache = null; - if (s_logger.isDebugEnabled()) { - s_logger.debug("Startup request from directly connected host: " + new Request(0, -1, -1, cmds, false).toString()); - } - try { - attache = handleDirectConnect(resource, cmds, details, old, hostTags); - }catch (IllegalArgumentException ex) - { - s_logger.warn("Unable to connect due to ", ex); - throw ex; - } - catch (Exception e) { - s_logger.warn("Unable to connect due to ", e); - } + if (cmds == null) + return null; + if (host != null) { + if (!_hostDao.directConnect(host, _nodeId)) { + s_logger.info("Someone else is loading " + host); + resource.disconnected(); + return null; + } + } + AgentAttache attache = null; + if (s_logger.isDebugEnabled()) { + s_logger.debug("Startup request from directly connected host: " + + new Request(0, -1, -1, cmds, false).toString()); + } + try { + attache = handleDirectConnect(resource, cmds, details, old, + hostTags); + } catch (IllegalArgumentException ex) { + s_logger.warn("Unable to connect due to ", ex); + throw ex; + } catch (Exception e) { + s_logger.warn("Unable to connect due to ", e); + } - if (attache == null) { - resource.disconnected(); - return null; - } - return attache; - } + if (attache == null) { + resource.disconnected(); + return null; + } + return attache; + } - @Override - public boolean stop() { - if (_monitor != null) { - _monitor.signalStop(); - } - if (_connection != null) { - _connection.stop(); - } + @Override + public boolean stop() { + if (_monitor != null) { + _monitor.signalStop(); + } + if (_connection != null) { + _connection.stop(); + } - s_logger.info("Disconnecting agents: " + _agents.size()); - synchronized (_agents) { - for (final AgentAttache agent : _agents.values()) { - final HostVO host = _hostDao.findById(agent.getId()); - if( host == null ) { - if (s_logger.isDebugEnabled()) { - s_logger.debug("Cant not find host " + agent.getId()); - } - } else { - _hostDao.updateStatus(host, Event.ManagementServerDown, _nodeId); - } - } - } - return true; - } + s_logger.info("Disconnecting agents: " + _agents.size()); + synchronized (_agents) { + for (final AgentAttache agent : _agents.values()) { + final HostVO host = _hostDao.findById(agent.getId()); + if (host == null) { + if (s_logger.isDebugEnabled()) { + s_logger.debug("Cant not find host " + agent.getId()); + } + } else { + _hostDao.updateStatus(host, Event.ManagementServerDown, + _nodeId); + } + } + } + return true; + } - @Override - public Pair findPod(final VirtualMachineTemplate template, ServiceOfferingVO offering, final DataCenterVO dc, final long accountId, Set avoids) { - final Enumeration en = _podAllocators.enumeration(); - while (en.hasMoreElements()) { - final PodAllocator allocator = (PodAllocator) en.nextElement(); - final Pair pod = allocator.allocateTo(template, offering, dc, accountId, avoids); - if (pod != null) { - return pod; - } - } - return null; - } + @Override + public Pair findPod(final VirtualMachineTemplate template, + ServiceOfferingVO offering, final DataCenterVO dc, + final long accountId, Set avoids) { + final Enumeration en = _podAllocators.enumeration(); + while (en.hasMoreElements()) { + final PodAllocator allocator = (PodAllocator) en.nextElement(); + final Pair pod = allocator.allocateTo(template, + offering, dc, accountId, avoids); + if (pod != null) { + return pod; + } + } + return null; + } - @Override - public HostStats getHostStatistics(long hostId) throws InternalErrorException - { - Answer answer = easySend(hostId, new GetHostStatsCommand(_hostDao.findById(hostId).getGuid(), _hostDao.findById(hostId).getName(),hostId)); - - if (answer != null && (answer instanceof UnsupportedAnswer)) { - return null; - } - - if (answer == null || !answer.getResult()) { - String msg = "Unable to obtain host " + hostId + " statistics. "; - s_logger.warn(msg); - return null; - } else { - + @Override + public HostStats getHostStatistics(long hostId) + throws InternalErrorException { + Answer answer = easySend(hostId, new GetHostStatsCommand(_hostDao + .findById(hostId).getGuid(), _hostDao.findById(hostId) + .getName(), hostId)); - //now construct the result object - if(answer instanceof GetHostStatsAnswer) - { - return ((GetHostStatsAnswer) answer).getHostStats(); - } - } - return null; - } - - @Override - public Long getGuestOSCategoryId(long hostId) { - HostVO host = _hostDao.findById(hostId); - if (host == null) { - return null; - } else { - _hostDao.loadDetails(host); - DetailVO detail = _hostDetailsDao.findDetail(hostId, "guest.os.category.id"); - if (detail == null) { - return null; - } else { - return Long.parseLong(detail.getValue()); - } - } - } - - @Override - public String getHostTags(long hostId){ + if (answer != null && (answer instanceof UnsupportedAnswer)) { + return null; + } + + if (answer == null || !answer.getResult()) { + String msg = "Unable to obtain host " + hostId + " statistics. "; + s_logger.warn(msg); + return null; + } else { + + // now construct the result object + if (answer instanceof GetHostStatsAnswer) { + return ((GetHostStatsAnswer) answer).getHostStats(); + } + } + return null; + } + + @Override + public Long getGuestOSCategoryId(long hostId) { + HostVO host = _hostDao.findById(hostId); + if (host == null) { + return null; + } else { + _hostDao.loadDetails(host); + DetailVO detail = _hostDetailsDao.findDetail(hostId, + "guest.os.category.id"); + if (detail == null) { + return null; + } else { + return Long.parseLong(detail.getValue()); + } + } + } + + @Override + public String getHostTags(long hostId) { List hostTags = _hostTagsDao.gethostTags(hostId); if (hostTags == null) { return null; } else { return StringUtils.listToCsvTags(hostTags); } - } - - @Override - public String getName() { - return _name; - } - - protected class DisconnectTask implements Runnable { - AgentAttache _attache; - Status.Event _event; - boolean _investigate; - - DisconnectTask(final AgentAttache attache, final Status.Event event, final boolean investigate) { - _attache = attache; - _event = event; - _investigate = investigate; - } - - @Override - public void run() { - try { - handleDisconnect(_attache, _event, _investigate); - } catch (final Exception e) { - s_logger.error("Exception caught while handling disconnect: ", e); - } finally { - StackMaid.current().exitCleanup(); - } - } - } - - @Override - public Answer easySend(final Long hostId, final Command cmd) { - return easySend(hostId, cmd, _wait); - } - - @Override - public Answer easySend(final Long hostId, final Command cmd, int timeout) { - try { - final Answer answer = send(hostId, cmd, timeout); - if (answer == null) { - s_logger.warn("send returns null answer"); - return null; - } - - if (!answer.getResult()) { - s_logger.warn("Unable to execute command: " + cmd.toString() + " due to " + answer.getDetails()); - return null; - } - - if (s_logger.isDebugEnabled() && answer.getDetails() != null) { - s_logger.debug("Details from executing " + cmd.getClass().toString() + ": " + answer.getDetails()); - } - - return answer; - - } catch (final AgentUnavailableException e) { - s_logger.warn(e.getMessage()); - return null; - } catch (final OperationTimedoutException e) { - s_logger.warn("Operation timed out: " + e.getMessage()); - return null; - } catch (final Exception e) { - s_logger.warn("Exception while sending", e); - return null; - } - } - - @Override - public Answer send(final Long hostId, final Command cmd) throws AgentUnavailableException, OperationTimedoutException { - return send(hostId, cmd, _wait); - } - - @Override - public Answer[] send(final Long hostId, final Command[] cmds, final boolean stopOnError) throws AgentUnavailableException, OperationTimedoutException { - return send(hostId, cmds, stopOnError, _wait); - } - - @Override - public boolean reconnect(final long hostId) throws AgentUnavailableException { - HostVO host; - - host = _hostDao.findById(hostId); - if (host == null || host.getRemoved() != null) { - s_logger.warn("Unable to find host " + hostId); - return false; - } - - if (host.getStatus() != Status.Up && host.getStatus() != Status.Alert) { - s_logger.info("Unable to disconnect host because it is not in the correct state: host=" + hostId + "; Status=" + host.getStatus()); - return false; - } - - AgentAttache attache = findAttache(hostId); - if (attache == null) { - s_logger.info("Unable to disconnect host because it is not connected to this server: " + hostId); - return false; - } - - disconnect(attache, Event.ShutdownRequested, false); - return true; - } - - @Override - public boolean cancelMaintenance(final long hostId) { - - HostVO host; - host = _hostDao.findById(hostId); - if (host == null || host.getRemoved() != null) { - s_logger.warn("Unable to find host " + hostId); - return true; - } - - if (host.getStatus() != Status.PrepareForMaintenance && host.getStatus() != Status.Maintenance && host.getStatus() != Status.ErrorInMaintenance) { - return true; - } - - _haMgr.cancelScheduledMigrations(host); - List vms = _haMgr.findTakenMigrationWork(); - for (VMInstanceVO vm : vms) { - if (vm.getHostId() != null && vm.getHostId() == hostId) { - s_logger.info("Unable to cancel migration because the vm is being migrated: " + vm.toString()); - return false; - } - } - disconnect(hostId, Event.ResetRequested, false); - return true; - } - - @Override - public boolean executeUserRequest(long hostId, Event event) throws AgentUnavailableException { - if (event == Event.MaintenanceRequested) { - return maintain(hostId); - } else if (event == Event.ResetRequested) { - return cancelMaintenance(hostId); - } else if (event == Event.Remove) { - return deleteHost(hostId); - } else if (event == Event.AgentDisconnected) { - if (s_logger.isDebugEnabled()) { - s_logger.debug("Received agent disconnect event for host " + hostId); - } - AgentAttache attache = null; - synchronized (_agents) { - attache = _agents.get(hostId); - } - if (attache != null) { - handleDisconnect(attache, Event.AgentDisconnected, false); - } - - return true; - } else if (event == Event.ShutdownRequested) { - return reconnect(hostId); - } - return false; - } - - @Override - public boolean maintain(final long hostId) throws AgentUnavailableException { - HostVO host = _hostDao.findById(hostId); - Status state; - - Answer answer = easySend(hostId, new MaintainCommand()); - if (answer == null || !answer.getResult()) { - s_logger.warn("Unable to put host in maintainance mode: " + hostId); - return false; - } - - // Let's put this guy in maintenance state - do { - host = _hostDao.findById(hostId); - if (host == null) { - s_logger.debug("Unable to find host " + hostId); - return false; - } - state = host.getStatus(); - if (state == Status.Disconnected || state == Status.Updating) { - s_logger.debug("Unable to put host " + hostId + " in matinenance mode because it is currently in " + state.toString()); - throw new AgentUnavailableException("Agent is in " + state.toString() + " state. Please wait for it to become Alert state try again.", hostId); - } - } while (!_hostDao.updateStatus(host, Event.MaintenanceRequested, _nodeId)); - - AgentAttache attache; - synchronized (_agents) { - attache = _agents.get(hostId); - if (attache != null) { - attache.setMaintenanceMode(true); - } - } - - if (attache != null) { - // Now cancel all of the commands except for the active one. - attache.cancelAllCommands(Status.PrepareForMaintenance, false); - } - - final Host.Type type = host.getType(); - - if (type == Host.Type.Routing) { - final List vms = _vmDao.listByHostId(hostId); - if (vms.size() == 0) { - return true; - } - - for (final VMInstanceVO vm : vms) { - _haMgr.scheduleMigration(vm); - } - } else { - final List ids = _volDao.findVmsStoredOnHost(hostId); - for (final Long id : ids) { - final VMInstanceVO instance = _vmDao.findById(id); - if (instance != null && (instance.getState() == State.Running || instance.getState() == State.Starting)) { - _haMgr.scheduleStop(instance, host.getId(), false); - } - } - } - - return true; - } - - public boolean checkCIDR(Host.Type type, HostPodVO pod, String serverPrivateIP, String serverPrivateNetmask) { - // Get the CIDR address and CIDR size - String cidrAddress = pod.getCidrAddress(); - long cidrSize = pod.getCidrSize(); - - // If the server's private IP address is not in the same subnet as the - // pod's CIDR, return false - String cidrSubnet = NetUtils.getCidrSubNet(cidrAddress, cidrSize); - String serverSubnet = NetUtils.getSubNet(serverPrivateIP, serverPrivateNetmask); - if (!cidrSubnet.equals(serverSubnet)) { - return false; - } - - // If the server's private netmask is less inclusive than the pod's CIDR - // netmask, return false - String cidrNetmask = NetUtils.getCidrSubNet("255.255.255.255", cidrSize); - long cidrNetmaskNumeric = NetUtils.ip2Long(cidrNetmask); - long serverNetmaskNumeric = NetUtils.ip2Long(serverPrivateNetmask); - if (serverNetmaskNumeric > cidrNetmaskNumeric) { - return false; - } - return true; - } - protected void checkCIDR(Host.Type type, HostPodVO pod, DataCenterVO dc, String serverPrivateIP, String serverPrivateNetmask) throws IllegalArgumentException { - // Skip this check for Storage Agents and Console Proxies - if (type == Host.Type.Storage || type == Host.Type.ConsoleProxy) - return; - - // Get the CIDR address and CIDR size - String cidrAddress = pod.getCidrAddress(); - long cidrSize = pod.getCidrSize(); - - // If the server's private IP address is not in the same subnet as the - // pod's CIDR, return false - String cidrSubnet = NetUtils.getCidrSubNet(cidrAddress, cidrSize); - String serverSubnet = NetUtils.getSubNet(serverPrivateIP, serverPrivateNetmask); - if (!cidrSubnet.equals(serverSubnet)) { - s_logger.warn("The private ip address of the server (" + serverPrivateIP + ") is not compatible with the CIDR of pod: " - + pod.getName() + " and zone: " + dc.getName()); - throw new IllegalArgumentException("The private ip address of the server (" + serverPrivateIP + ") is not compatible with the CIDR of pod: " - + pod.getName() + " and zone: " + dc.getName()); - } - - // If the server's private netmask is less inclusive than the pod's CIDR - // netmask, return false - String cidrNetmask = NetUtils.getCidrSubNet("255.255.255.255", cidrSize); - long cidrNetmaskNumeric = NetUtils.ip2Long(cidrNetmask); - long serverNetmaskNumeric = NetUtils.ip2Long(serverPrivateNetmask); - if (serverNetmaskNumeric > cidrNetmaskNumeric) { - throw new IllegalArgumentException("The private ip address of the server (" + serverPrivateIP + ") is not compatible with the CIDR of pod: " - + pod.getName() + " and zone: " + dc.getName()); - } - - } - - public void checkIPConflicts(Host.Type type, HostPodVO pod, DataCenterVO dc, String serverPrivateIP, String serverPrivateNetmask, String serverPublicIP, - String serverPublicNetmask) { - // If the server's private IP is the same as is public IP, this host has - // a host-only private network. Don't check for conflicts with the - // private IP address table. - if (serverPrivateIP != serverPublicIP) { - if (!_privateIPAddressDao.mark(dc.getId(), pod.getId(), serverPrivateIP)) { - // If the server's private IP address is already in the - // database, return false - List existingPrivateIPs = _privateIPAddressDao.listByPodIdDcIdIpAddress(pod.getId(), dc.getId(), serverPrivateIP); - - assert existingPrivateIPs.size() <= 1 : " How can we get more than one ip address with " + serverPrivateIP; - if (existingPrivateIPs.size() > 1) { - throw new IllegalArgumentException("The private ip address of the server (" + serverPrivateIP + ") is already in use in pod: " - + pod.getName() + " and zone: " + dc.getName()); - } - if (existingPrivateIPs.size() == 1) { - DataCenterIpAddressVO vo = existingPrivateIPs.get(0); - if (vo.getInstanceId() != null) { - throw new IllegalArgumentException("The private ip address of the server (" + serverPrivateIP + ") is already in use in pod: " - + pod.getName() + " and zone: " + dc.getName()); - } - } - } - } - - if (serverPublicIP != null && !_publicIPAddressDao.mark(dc.getId(), serverPublicIP)) { - // If the server's public IP address is already in the database, - // return false - List existingPublicIPs = _publicIPAddressDao.listByDcIdIpAddress(dc.getId(), serverPublicIP); - if (existingPublicIPs.size() > 0) { - throw new IllegalArgumentException("The public ip address of the server (" + serverPublicIP + ") is already in use in zone: " + dc.getName()); - } - } - } - - public HostVO createHost(final StartupCommand startup, ServerResource resource, Map details, boolean directFirst, List hostTags) throws IllegalArgumentException { - Host.Type type = null; - - if (startup instanceof StartupStorageCommand) { - - StartupStorageCommand ssCmd = ((StartupStorageCommand) startup); - if (ssCmd.getResourceType() == StorageResourceType.SECONDARY_STORAGE) { - type = Host.Type.SecondaryStorage; - if (resource != null && resource instanceof DummySecondaryStorageResource){ - resource = null; - } - } else { - type = Host.Type.Storage; - } - final Map hostDetails = ssCmd.getHostDetails(); - if (hostDetails != null) { - if (details != null) { - details.putAll(hostDetails); - } else { - details = hostDetails; - } - } - } else if (startup instanceof StartupRoutingCommand) { - StartupRoutingCommand ssCmd = ((StartupRoutingCommand) startup); - type = Host.Type.Routing; - final Map hostDetails = ssCmd.getHostDetails(); - if (hostDetails != null) { - if (details != null) { - details.putAll(hostDetails); - } else { - details = hostDetails; - } - } - } else if (startup instanceof StartupProxyCommand) { - type = Host.Type.ConsoleProxy; - } else if (startup instanceof StartupRoutingCommand) { - type = Host.Type.Routing; - } else { - assert false : "Did someone add a new Startup command?"; - } - - Long id = null; - HostVO server = _hostDao.findByGuid(startup.getGuid()); - if (server == null) { - server = _hostDao.findByGuid(startup.getGuidWithoutResource()); - } - if (server != null && server.getRemoved() == null) { - id = server.getId(); - if (s_logger.isDebugEnabled()) { - s_logger.debug("Found the host " + id + " by guid: " + startup.getGuid()); - } - if (directFirst) { - s_logger.debug("Old host reconnected as new"); - return null; - } - } else { - server = new HostVO(startup.getGuid()); - } - - server.setDetails(details); - server.setHostTags(hostTags); - - updateHost(server, startup, type, _nodeId); - if (resource != null) { - server.setResource(resource.getClass().getName()); - } - if (id == null) { - /* - * // ignore integrity check for agent-simulator - * if(!"0.0.0.0".equals(startup.getPrivateIpAddress()) && - * !"0.0.0.0".equals(startup.getStorageIpAddress())) { if - * (_hostDao.findByPrivateIpAddressInDataCenter - * (server.getDataCenterId(), startup.getPrivateIpAddress()) != - * null) { throw newIllegalArgumentException( - * "The private ip address is already in used: " + - * startup.getPrivateIpAddress()); } - * - * if - * (_hostDao.findByPrivateIpAddressInDataCenter(server.getDataCenterId - * (), startup.getStorageIpAddress()) != null) { throw new - * IllegalArgumentException - * ("The private ip address is already in used: " + - * startup.getStorageIpAddress()); } } - */ - - if (startup instanceof StartupStorageCommand) { - server = _hostDao.persist(server); - id = server.getId(); - } else if (startup instanceof StartupProxyCommand) { - server.setProxyPort(((StartupProxyCommand) startup).getProxyPort()); - server = _hostDao.persist(server); - id = server.getId(); - } else if (startup instanceof StartupRoutingCommand) { - server = _hostDao.persist(server); - id = server.getId(); - } - - s_logger.info("New " + server.getType().toString() + " host connected w/ guid " + startup.getGuid() + " and id is " + id); - } else { - if (!_hostDao.connect(server, _nodeId)) { - throw new CloudRuntimeException("Agent cannot connect because the current state is " + server.getStatus().toString()); - } - s_logger.info("Old " + server.getType().toString() + " host reconnected w/ id =" + id); - } - createCapacityEntry(startup, server); - - return server; - } - - public HostVO createHost(final StartupCommand[] startup, ServerResource resource, Map details, boolean directFirst, List hostTags) throws IllegalArgumentException { - StartupCommand firstCmd = startup[0]; - HostVO result = createHost(firstCmd, resource, details, directFirst, hostTags); - if( result == null ) { - return null; - } - return result; - } - - public AgentAttache handleConnect(final Link link, final StartupCommand[] startup) throws IllegalArgumentException { - HostVO server = createHost(startup, null, null, false, null); - if ( server == null ) { - return null; - } - long id = server.getId(); - - AgentAttache attache = createAttache(id, server, link); - - attache = notifyMonitorsOfConnection(attache, startup); - - return attache; - } - - public AgentAttache findAgent(long hostId) { - synchronized (_agents) { - return _agents.get(hostId); - } - } - - protected AgentAttache createAttache(long id, HostVO server, Link link) { - s_logger.debug("Adding link for " + id); - final AgentAttache attache = new ConnectedAgentAttache(id, link, server.getStatus() == Status.Maintenance - || server.getStatus() == Status.ErrorInMaintenance || server.getStatus() == Status.PrepareForMaintenance); - link.attach(attache); - AgentAttache old = null; - synchronized (_agents) { - old = _agents.get(id); - _agents.put(id, attache); - } - if( old != null ) { - old.disconnect(Status.Removed); - } - return attache; - } - - protected AgentAttache createAttache(long id, HostVO server, ServerResource resource) { - s_logger.debug("Adding directly connect host for " + id); - if (resource instanceof DummySecondaryStorageResource) { - return new DummyAttache(id, false); - } - final DirectAgentAttache attache = new DirectAgentAttache(id, resource, server.getStatus() == Status.Maintenance - || server.getStatus() == Status.ErrorInMaintenance || server.getStatus() == Status.PrepareForMaintenance, this); - AgentAttache old = null; - synchronized (_agents) { - old = _agents.get(id); - _agents.put(id, attache); - } - if( old != null ) { - old.disconnect(Status.Removed); - } - return attache; - } - - @Override - public boolean maintenanceFailed(long hostId) { - HostVO host = _hostDao.findById(hostId); - if( host == null ) { - if (s_logger.isDebugEnabled()) { - s_logger.debug("Cant not find host " + hostId); - } - return false; - } else { - return _hostDao.updateStatus(host, Event.UnableToMigrate, _nodeId); - } - } - - @Override - public void updateHost(long hostId, long guestOSCategoryId){ - GuestOSCategoryVO guestOSCategory = _guestOSCategoryDao.findById(guestOSCategoryId); - Map hostDetails = _hostDetailsDao.findDetails(hostId); - - if (guestOSCategory != null) { - // Save a new entry for guest.os.category.id - hostDetails.put("guest.os.category.id", String.valueOf(guestOSCategory.getId())); - } else { - // Delete any existing entry for guest.os.category.id - hostDetails.remove("guest.os.category.id"); - } - - _hostDetailsDao.persist(hostId, hostDetails); - - } - - protected void updateHost(final HostVO host, final StartupCommand startup, final Host.Type type, final long msId) throws IllegalArgumentException { - s_logger.debug("updateHost() called"); - - String dataCenter = startup.getDataCenter(); - String pod = startup.getPod(); - String cluster = startup.getCluster(); - - if (pod != null && dataCenter != null && pod.equalsIgnoreCase("default") && dataCenter.equalsIgnoreCase("default")) { - List pods = _podDao.listAll(); - for (HostPodVO hpv : pods) { - if (checkCIDR(type, hpv, startup.getPrivateIpAddress(), startup.getPrivateNetmask())) { - pod = hpv.getName(); - dataCenter = _dcDao.findById(hpv.getDataCenterId()).getName(); - break; - } - } - } - long dcId = -1; - DataCenterVO dc = _dcDao.findByName(dataCenter); - if (dc == null) { - try { - dcId = Long.parseLong(dataCenter); - dc = _dcDao.findById(dcId); - } catch (final NumberFormatException e) { - } - } - if (dc == null) { - throw new IllegalArgumentException("Host " + startup.getPrivateIpAddress() + " sent incorrect data center: " + dataCenter); - } - dcId = dc.getId(); - - HostPodVO p = _podDao.findByName(pod, dcId); - if (p == null) { - try { - final long podId = Long.parseLong(pod); - p = _podDao.findById(podId); - } catch (final NumberFormatException e) { - } - } - Long podId = null; - if (p == null) { - if (type != Host.Type.SecondaryStorage) { - - /* - * s_logger.info("Unable to find the pod so we are creating one." - * ); p = createPod(pod, dcId, startup.getPrivateIpAddress(), - * NetUtils.getCidrSize(startup.getPrivateNetmask())); podId = - * p.getId(); - */ - s_logger.error("Host " + startup.getPrivateIpAddress() + " sent incorrect pod: " + pod + " in " + dataCenter); - throw new IllegalArgumentException("Host " + startup.getPrivateIpAddress() + " sent incorrect pod: " + pod + " in " + dataCenter); - } - } else { - podId = p.getId(); - } - - Long clusterId = null; - if (cluster != null) { - try { - clusterId = Long.valueOf(cluster); - } catch (NumberFormatException e) { - ClusterVO c = _clusterDao.findBy(cluster, podId); - if (c == null) { - c = new ClusterVO(dcId, podId, cluster); - c = _clusterDao.persist(c); - } - clusterId = c.getId(); - } - } - - if (type == Host.Type.Routing) { - StartupRoutingCommand scc = (StartupRoutingCommand) startup; - Hypervisor.Type hypervisorType = scc.getHypervisorType(); - boolean doCidrCheck = true; - - // If this command is from the agent simulator, don't do the CIDR - // check - if (scc.getAgentTag() != null && startup.getAgentTag().equalsIgnoreCase("agent-simulator")) - doCidrCheck = false; - - // If this command is from a KVM agent, or from an agent that has a - // null hypervisor type, don't do the CIDR check - if (hypervisorType == null || hypervisorType == Hypervisor.Type.KVM) - doCidrCheck = false; - - if (doCidrCheck) - s_logger.info("Host: " + host.getName() + " connected with hypervisor type: " + hypervisorType + ". Checking CIDR..."); - else - s_logger.info("Host: " + host.getName() + " connected with hypervisor type: " + hypervisorType + ". Skipping CIDR check..."); - - if (doCidrCheck) { - checkCIDR(type, p, dc, scc.getPrivateIpAddress(), scc.getPrivateNetmask()); - } - - // Check if the private/public IPs of the server are already in the - // private/public IP address tables - checkIPConflicts(type, p, dc, scc.getPrivateIpAddress(), scc.getPublicIpAddress(), scc.getPublicIpAddress(), scc.getPublicNetmask()); - } - - host.setDataCenterId(dc.getId()); - host.setPodId(podId); - host.setClusterId(clusterId); - host.setPrivateIpAddress(startup.getPrivateIpAddress()); - host.setPrivateNetmask(startup.getPrivateNetmask()); - host.setPrivateMacAddress(startup.getPrivateMacAddress()); - host.setPublicIpAddress(startup.getPublicIpAddress()); - host.setPublicMacAddress(startup.getPublicMacAddress()); - host.setPublicNetmask(startup.getPublicNetmask()); - host.setStorageIpAddress(startup.getStorageIpAddress()); - host.setStorageMacAddress(startup.getStorageMacAddress()); - host.setStorageNetmask(startup.getStorageNetmask()); - host.setVersion(startup.getVersion()); - host.setName(startup.getName()); - host.setType(type); - host.setManagementServerId(msId); - host.setStorageUrl(startup.getIqn()); - host.setLastPinged(System.currentTimeMillis() >> 10); - if (startup instanceof StartupRoutingCommand) { - final StartupRoutingCommand scc = (StartupRoutingCommand) startup; - host.setCaps(scc.getCapabilities()); - host.setCpus(scc.getCpus()); - host.setTotalMemory(scc.getMemory()); - host.setSpeed(scc.getSpeed()); - Hypervisor.Type hyType = scc.getHypervisorType(); - if (hyType == null) { - host.setHypervisorType(Hypervisor.Type.Xen); - } else { - host.setHypervisorType(hyType); - } - } else if(startup instanceof StartupStorageCommand) { - final StartupStorageCommand ssc = (StartupStorageCommand) startup; - host.setParent(ssc.getParent()); - host.setTotalSize(ssc.getTotalSize()); - host.setHypervisorType(Hypervisor.Type.None); - if (ssc.getNfsShare() != null) { - host.setStorageUrl(ssc.getNfsShare()); - } - } - if (startup.getStorageIpAddressDeux() != null) { - host.setStorageIpAddressDeux(startup.getStorageIpAddressDeux()); - host.setStorageMacAddressDeux(startup.getStorageMacAddressDeux()); - host.setStorageNetmaskDeux(startup.getStorageNetmaskDeux()); - } - - } - - // create capacity entries if none exist for this server - private void createCapacityEntry(final StartupCommand startup, HostVO server) { - SearchCriteria capacitySC = _capacityDao.createSearchCriteria(); - capacitySC.addAnd("hostOrPoolId", SearchCriteria.Op.EQ, server.getId()); - capacitySC.addAnd("dataCenterId", SearchCriteria.Op.EQ, server.getDataCenterId()); - capacitySC.addAnd("podId", SearchCriteria.Op.EQ, server.getPodId()); - List capacities = _capacityDao.search(capacitySC, null); - - // remove old entries, we'll recalculate them anyway - if ((capacities != null) && !capacities.isEmpty()) { - for (CapacityVO capacity : capacities) { - if ( capacity.getCapacityType() != CapacityVO.CAPACITY_TYPE_SECONDARY_STORAGE){ // Not allowing secondary storage to be deleted Bug# 7391 - _capacityDao.remove(capacity.getId()); - } - } - } - - if (startup instanceof StartupStorageCommand) { - StartupStorageCommand ssCmd = (StartupStorageCommand) startup; - if (ssCmd.getResourceType() == StorageResourceType.STORAGE_HOST) { - CapacityVO capacity = new CapacityVO(server.getId(), server.getDataCenterId(), server.getPodId(), 0L, server.getTotalSize() * _overProvisioningFactor, - CapacityVO.CAPACITY_TYPE_STORAGE_ALLOCATED); - _capacityDao.persist(capacity); - } - } else if (startup instanceof StartupRoutingCommand) { - - CapacityVO capacity = new CapacityVO(server.getId(), server.getDataCenterId(), server.getPodId(), 0L, - server.getTotalMemory(), CapacityVO.CAPACITY_TYPE_MEMORY); - _capacityDao.persist(capacity); - - capacity = new CapacityVO(server.getId(), server.getDataCenterId(), server.getPodId(), 0L, (long)(server.getCpus().longValue() - * server.getSpeed().longValue()*_cpuOverProvisioningFactor), CapacityVO.CAPACITY_TYPE_CPU); - _capacityDao.persist(capacity); - } - } - - protected void upgradeAgent(final Link link, final byte[] request, final String reason) { - - if (reason == UnsupportedVersionException.IncompatibleVersion) { - final UpgradeResponse response = new UpgradeResponse(request, _upgradeMgr.getAgentUrl()); - try { - s_logger.info("Asking for the agent to update due to incompatible version: " + response.toString()); - link.send(response.toBytes()); - } catch (final ClosedChannelException e) { - s_logger.warn("Unable to send response due to connection closed: " + response.toString()); - } - return; - } - - assert (reason == UnsupportedVersionException.UnknownVersion) : "Unknown reason: " + reason; - final UpgradeResponse response = new UpgradeResponse(request, _upgradeMgr.getAgentUrl()); - try { - s_logger.info("Asking for the agent to update due to unknown version: " + response.toString()); - link.send(response.toBytes()); - } catch (final ClosedChannelException e) { - s_logger.warn("Unable to send response due to connection closed: " + response.toString()); - } - } - - protected class SimulateStartTask implements Runnable { - ServerResource resource; - Map details; - long id; - ActionDelegate actionDelegate; - - public SimulateStartTask(long id, ServerResource resource, Map details, ActionDelegate actionDelegate) { - this.id = id; - this.resource = resource; - this.details = details; - this.actionDelegate = actionDelegate; - } - - @Override - public void run() { - AgentAttache at = null; - try { - if (s_logger.isDebugEnabled()) { - s_logger.debug("Simulating start for resource " + resource.getName() + " id " + id); - } - at = simulateStart(resource, details, false, null); - } catch (Exception e) { - - s_logger.warn("Unable to simulate start on resource " + id + " name " + resource.getName(), e); - } finally { - StackMaid.current().exitCleanup(); - if ( at == null ) { - HostVO host = _hostDao.findById(id); - host.setManagementServerId(null); - _hostDao.update(id, host); - } - } - } - } - - public class AgentHandler extends Task { - public AgentHandler(Task.Type type, Link link, byte[] data) { - super(type, link, data); - } - - protected void processRequest(final Link link, final Request request) { - AgentAttache attache = (AgentAttache) link.attachment(); - final Command[] cmds = request.getCommands(); - Command cmd = cmds[0]; - boolean logD = true; - - Response response = null; - if (attache == null) { - s_logger.debug("Processing sequence " + request.getSequence() + ": Processing " + request.toString()); - if (!(cmd instanceof StartupCommand)) { - s_logger.warn("Throwing away a request because it came through as the first command on a connect: " + request.toString()); - return; - } - StartupCommand startup = (StartupCommand) cmd; - if ((_upgradeMgr.registerForUpgrade(-1, startup.getVersion()) == UpgradeManager.State.RequiresUpdate) && (_upgradeMgr.getAgentUrl() != null)) { - final UpgradeCommand upgrade = new UpgradeCommand(_upgradeMgr.getAgentUrl()); - final Request req = new Request(1, -1, -1, new Command[] { upgrade }, true, true); - s_logger.info("Agent requires upgrade: " + req.toString()); - try { - link.send(req.toBytes()); - } catch (ClosedChannelException e) { - s_logger.warn("Unable to tell agent it should update."); - } - return; - } - try { - StartupCommand[] startups = new StartupCommand[cmds.length]; - for (int i = 0; i < cmds.length; i++) - startups[i] = (StartupCommand) cmds[i]; - attache = handleConnect(link, startups); - } catch (final IllegalArgumentException e) { - _alertMgr.sendAlert(AlertManager.ALERT_TYPE_HOST, 0, new Long(0), "Agent from " + startup.getPrivateIpAddress() - + " is unable to connect due to " + e.getMessage(), "Agent from " + startup.getPrivateIpAddress() + " is unable to connect with " - + request.toString() + " because of " + e.getMessage()); - s_logger.warn("Unable to create attache for agent: " + request.toString(), e); - response = new Response(request, new StartupAnswer((StartupCommand) cmd, e.getMessage()), _nodeId, -1); - } catch (final CloudRuntimeException e) { - _alertMgr.sendAlert(AlertManager.ALERT_TYPE_HOST, 0, new Long(0), "Agent from " + startup.getPrivateIpAddress() - + " is unable to connect due to " + e.getMessage(), "Agent from " + startup.getPrivateIpAddress() + " is unable to connect with " - + request.toString() + " because of " + e.getMessage()); - s_logger.warn("Unable to create attache for agent: " + request.toString(), e); - } - if (attache == null) { - if (response == null) { - s_logger.warn("Unable to create attache for agent: " + request.toString()); - response = new Response(request, new StartupAnswer((StartupCommand) request.getCommand(), "Unable to register this agent"), _nodeId, -1); - } - try { - link.send(response.toBytes(), true); - } catch (final ClosedChannelException e) { - s_logger.warn("Response was not sent: " + response.toString()); - } - return; - } - } - - final long hostId = attache.getId(); - - if (s_logger.isDebugEnabled()) { - if (cmd instanceof PingRoutingCommand) { - final PingRoutingCommand ping = (PingRoutingCommand) cmd; - if (ping.getNewStates().size() > 0) { - s_logger.debug("SeqA " + hostId + "-" + request.getSequence() + ": Processing " + request.toString()); - } else { - logD = false; - s_logger.debug("Ping from " + hostId); - s_logger.trace("SeqA " + hostId + "-" + request.getSequence() + ": Processing " + request.toString()); - } - } else if (cmd instanceof PingCommand) { - logD = false; - s_logger.debug("Ping from " + hostId); - s_logger.trace("SeqA " + attache.getId() + "-" + request.getSequence() + ": Processing " + request.toString()); - } else { - s_logger.debug("SeqA " + attache.getId() + "-" + request.getSequence() + ": Processing " + request.toString()); - } - } - - final Answer[] answers = new Answer[cmds.length]; - for (int i = 0; i < cmds.length; i++) { - cmd = cmds[i]; - Answer answer = null; - try { - if (cmd instanceof StartupRoutingCommand) { - final StartupRoutingCommand startup = (StartupRoutingCommand) cmd; - answer = new StartupAnswer(startup, attache.getId(), getPingInterval()); - } else if (cmd instanceof StartupProxyCommand) { - final StartupProxyCommand startup = (StartupProxyCommand) cmd; - answer = new StartupAnswer(startup, attache.getId(), getPingInterval()); - } else if (cmd instanceof StartupStorageCommand) { - final StartupStorageCommand startup = (StartupStorageCommand) cmd; - answer = new StartupAnswer(startup, attache.getId(), getPingInterval()); - } else if (cmd instanceof ShutdownCommand) { - final ShutdownCommand shutdown = (ShutdownCommand) cmd; - final String reason = shutdown.getReason(); - s_logger.info("Host " + attache.getId() + " has informed us that it is shutting down with reason " + reason + " and detail " - + shutdown.getDetail()); - if (reason.equals(ShutdownCommand.Update)) { - disconnect(attache, Event.UpdateNeeded, false); - } else if (reason.equals(ShutdownCommand.Requested)) { - disconnect(attache, Event.ShutdownRequested, false); - } - return; - } else if(cmd instanceof AgentControlCommand) { - answer = handleControlCommand(attache, (AgentControlCommand)cmd); - } else { - handleCommands(attache, request.getSequence(), new Command[] { cmd }); - if (cmd instanceof PingCommand) { - long cmdHostId = ((PingCommand) cmd).getHostId(); - - // if the router is sending a ping, verify the - // gateway was pingable - if (cmd instanceof PingRoutingCommand) { - boolean gatewayAccessible = ((PingRoutingCommand) cmd).isGatewayAccessible(); - HostVO host = _hostDao.findById(Long.valueOf(cmdHostId)); - if (!gatewayAccessible) { - // alert that host lost connection to - // gateway (cannot ping the default route) - DataCenterVO dcVO = _dcDao.findById(host.getDataCenterId()); - HostPodVO podVO = _podDao.findById(host.getPodId()); - String hostDesc = "name: " + host.getName() + " (id:" + host.getId() + "), availability zone: " + dcVO.getName() - + ", pod: " + podVO.getName(); - - _alertMgr.sendAlert(AlertManager.ALERT_TYPE_ROUTING, host.getDataCenterId(), host.getPodId(), - "Host lost connection to gateway, " + hostDesc, "Host [" + hostDesc - + "] lost connection to gateway (default route) and is possibly having network connection issues."); - } else { - _alertMgr.clearAlert(AlertManager.ALERT_TYPE_ROUTING, host.getDataCenterId(), host.getPodId()); - } - } - answer = new PingAnswer((PingCommand) cmd); - } else if (cmd instanceof ReadyAnswer) { - HostVO host = _hostDao.findById(attache.getId()); - if( host == null ) { - if (s_logger.isDebugEnabled()) { - s_logger.debug("Cant not find host " + attache.getId()); - } - } else { - s_logger.info("Host " + attache.getId() + " is now ready to processing commands."); - _hostDao.updateStatus(host, Event.Ready, _nodeId); - } - } else { - answer = new Answer(cmd); - } - } - } catch (final Throwable th) { - s_logger.warn("Caught: ", th); - answer = new Answer(cmd, false, th.getMessage()); - } - answers[i] = answer; - } - - response = new Response(request, answers, _nodeId, attache.getId()); - if (s_logger.isDebugEnabled()) { - if (logD) { - s_logger.debug("SeqA " + attache.getId() + "-" + response.getSequence() + ": Sending " + response.toString()); - } else { - s_logger.trace("SeqA " + attache.getId() + "-" + response.getSequence() + ": Sending " + response.toString()); - } - } - try { - link.send(response.toBytes()); - } catch (final ClosedChannelException e) { - s_logger.warn("Unable to send response because connection is closed: " + response.toString()); - } - } - - protected void processResponse(final Link link, final Response response) { - final AgentAttache attache = (AgentAttache) link.attachment(); - if (attache == null) { - s_logger.warn("Unable to process: " + response.toString()); - } - - if (!attache.processAnswers(response.getSequence(), response)) { - s_logger.info("Host " + attache.getId() + " - Seq " + response.getSequence() + ": Response is not processed: " + response.toString()); - } - } - - @Override - protected void doTask(final Task task) throws Exception { - Transaction txn = Transaction.open(Transaction.CLOUD_DB); - try { - final Type type = task.getType(); - if (type == Task.Type.DATA) { - final byte[] data = task.getData(); - try { - final Request event = Request.parse(data); - if (event instanceof Response) { - processResponse(task.getLink(), (Response) event); - } else { - processRequest(task.getLink(), event); - } - } catch (final UnsupportedVersionException e) { - s_logger.warn(e.getMessage()); - upgradeAgent(task.getLink(), data, e.getReason()); - } - } else if (type == Task.Type.CONNECT) { - } else if (type == Task.Type.DISCONNECT) { - final Link link = task.getLink(); - final AgentAttache attache = (AgentAttache) link.attachment(); - if (attache != null) { - disconnect(attache, Event.AgentDisconnected, true); - } else { - s_logger.info("Connection from " + link.getIpAddress() + " closed but no cleanup was done."); - link.close(); - link.terminated(); - } - } - } finally { - StackMaid.current().exitCleanup(); - txn.close(); - } - } - } - - protected AgentManagerImpl() { - } + } + + @Override + public String getName() { + return _name; + } + + protected class DisconnectTask implements Runnable { + AgentAttache _attache; + Status.Event _event; + boolean _investigate; + + DisconnectTask(final AgentAttache attache, final Status.Event event, + final boolean investigate) { + _attache = attache; + _event = event; + _investigate = investigate; + } + + @Override + public void run() { + try { + handleDisconnect(_attache, _event, _investigate); + } catch (final Exception e) { + s_logger.error("Exception caught while handling disconnect: ", + e); + } finally { + StackMaid.current().exitCleanup(); + } + } + } + + @Override + public Answer easySend(final Long hostId, final Command cmd) { + return easySend(hostId, cmd, _wait); + } + + @Override + public Answer easySend(final Long hostId, final Command cmd, int timeout) { + try { + final Answer answer = send(hostId, cmd, timeout); + if (answer == null) { + s_logger.warn("send returns null answer"); + return null; + } + + if (!answer.getResult()) { + s_logger.warn("Unable to execute command: " + cmd.toString() + + " due to " + answer.getDetails()); + return null; + } + + if (s_logger.isDebugEnabled() && answer.getDetails() != null) { + s_logger.debug("Details from executing " + + cmd.getClass().toString() + ": " + + answer.getDetails()); + } + + return answer; + + } catch (final AgentUnavailableException e) { + s_logger.warn(e.getMessage()); + return null; + } catch (final OperationTimedoutException e) { + s_logger.warn("Operation timed out: " + e.getMessage()); + return null; + } catch (final Exception e) { + s_logger.warn("Exception while sending", e); + return null; + } + } + + @Override + public Answer send(final Long hostId, final Command cmd) + throws AgentUnavailableException, OperationTimedoutException { + return send(hostId, cmd, _wait); + } + + @Override + public Answer[] send(final Long hostId, final Command[] cmds, + final boolean stopOnError) throws AgentUnavailableException, + OperationTimedoutException { + return send(hostId, cmds, stopOnError, _wait); + } + + @Override + public boolean reconnect(final long hostId) + throws AgentUnavailableException { + HostVO host; + + host = _hostDao.findById(hostId); + if (host == null || host.getRemoved() != null) { + s_logger.warn("Unable to find host " + hostId); + return false; + } + + if (host.getStatus() != Status.Up && host.getStatus() != Status.Alert) { + s_logger.info("Unable to disconnect host because it is not in the correct state: host=" + + hostId + "; Status=" + host.getStatus()); + return false; + } + + AgentAttache attache = findAttache(hostId); + if (attache == null) { + s_logger.info("Unable to disconnect host because it is not connected to this server: " + + hostId); + return false; + } + + disconnect(attache, Event.ShutdownRequested, false); + return true; + } + + @Override + public boolean cancelMaintenance(final long hostId) { + + HostVO host; + host = _hostDao.findById(hostId); + if (host == null || host.getRemoved() != null) { + s_logger.warn("Unable to find host " + hostId); + return true; + } + + if (host.getStatus() != Status.PrepareForMaintenance + && host.getStatus() != Status.Maintenance + && host.getStatus() != Status.ErrorInMaintenance) { + return true; + } + + _haMgr.cancelScheduledMigrations(host); + List vms = _haMgr.findTakenMigrationWork(); + for (VMInstanceVO vm : vms) { + if (vm.getHostId() != null && vm.getHostId() == hostId) { + s_logger.info("Unable to cancel migration because the vm is being migrated: " + + vm.toString()); + return false; + } + } + disconnect(hostId, Event.ResetRequested, false); + return true; + } + + @Override + public boolean executeUserRequest(long hostId, Event event) + throws AgentUnavailableException { + if (event == Event.MaintenanceRequested) { + return maintain(hostId); + } else if (event == Event.ResetRequested) { + return cancelMaintenance(hostId); + } else if (event == Event.Remove) { + return deleteHost(hostId); + } else if (event == Event.AgentDisconnected) { + if (s_logger.isDebugEnabled()) { + s_logger.debug("Received agent disconnect event for host " + + hostId); + } + AgentAttache attache = null; + synchronized (_agents) { + attache = _agents.get(hostId); + } + if (attache != null) { + handleDisconnect(attache, Event.AgentDisconnected, false); + } + + return true; + } else if (event == Event.ShutdownRequested) { + return reconnect(hostId); + } + return false; + } + + @Override + public boolean maintain(final long hostId) throws AgentUnavailableException { + HostVO host = _hostDao.findById(hostId); + Status state; + + Answer answer = easySend(hostId, new MaintainCommand()); + if (answer == null || !answer.getResult()) { + s_logger.warn("Unable to put host in maintainance mode: " + hostId); + return false; + } + + // Let's put this guy in maintenance state + do { + host = _hostDao.findById(hostId); + if (host == null) { + s_logger.debug("Unable to find host " + hostId); + return false; + } + state = host.getStatus(); + if (state == Status.Disconnected || state == Status.Updating) { + s_logger.debug("Unable to put host " + hostId + + " in matinenance mode because it is currently in " + + state.toString()); + throw new AgentUnavailableException( + "Agent is in " + + state.toString() + + " state. Please wait for it to become Alert state try again.", + hostId); + } + } while (!_hostDao.updateStatus(host, Event.MaintenanceRequested, + _nodeId)); + + AgentAttache attache; + synchronized (_agents) { + attache = _agents.get(hostId); + if (attache != null) { + attache.setMaintenanceMode(true); + } + } + + if (attache != null) { + // Now cancel all of the commands except for the active one. + attache.cancelAllCommands(Status.PrepareForMaintenance, false); + } + + final Host.Type type = host.getType(); + + if (type == Host.Type.Routing) { + final List vms = _vmDao.listByHostId(hostId); + if (vms.size() == 0) { + return true; + } + + for (final VMInstanceVO vm : vms) { + _haMgr.scheduleMigration(vm); + } + } else { + final List ids = _volDao.findVmsStoredOnHost(hostId); + for (final Long id : ids) { + final VMInstanceVO instance = _vmDao.findById(id); + if (instance != null + && (instance.getState() == State.Running || instance + .getState() == State.Starting)) { + _haMgr.scheduleStop(instance, host.getId(), false); + } + } + } + + return true; + } + + public boolean checkCIDR(Host.Type type, HostPodVO pod, + String serverPrivateIP, String serverPrivateNetmask) { + // Get the CIDR address and CIDR size + String cidrAddress = pod.getCidrAddress(); + long cidrSize = pod.getCidrSize(); + + // If the server's private IP address is not in the same subnet as the + // pod's CIDR, return false + String cidrSubnet = NetUtils.getCidrSubNet(cidrAddress, cidrSize); + String serverSubnet = NetUtils.getSubNet(serverPrivateIP, + serverPrivateNetmask); + if (!cidrSubnet.equals(serverSubnet)) { + return false; + } + + // If the server's private netmask is less inclusive than the pod's CIDR + // netmask, return false + String cidrNetmask = NetUtils + .getCidrSubNet("255.255.255.255", cidrSize); + long cidrNetmaskNumeric = NetUtils.ip2Long(cidrNetmask); + long serverNetmaskNumeric = NetUtils.ip2Long(serverPrivateNetmask); + if (serverNetmaskNumeric > cidrNetmaskNumeric) { + return false; + } + return true; + } + + protected void checkCIDR(Host.Type type, HostPodVO pod, DataCenterVO dc, + String serverPrivateIP, String serverPrivateNetmask) + throws IllegalArgumentException { + // Skip this check for Storage Agents and Console Proxies + if (type == Host.Type.Storage || type == Host.Type.ConsoleProxy) + return; + + // Get the CIDR address and CIDR size + String cidrAddress = pod.getCidrAddress(); + long cidrSize = pod.getCidrSize(); + + // If the server's private IP address is not in the same subnet as the + // pod's CIDR, return false + String cidrSubnet = NetUtils.getCidrSubNet(cidrAddress, cidrSize); + String serverSubnet = NetUtils.getSubNet(serverPrivateIP, + serverPrivateNetmask); + if (!cidrSubnet.equals(serverSubnet)) { + s_logger.warn("The private ip address of the server (" + + serverPrivateIP + + ") is not compatible with the CIDR of pod: " + + pod.getName() + " and zone: " + dc.getName()); + throw new IllegalArgumentException( + "The private ip address of the server (" + serverPrivateIP + + ") is not compatible with the CIDR of pod: " + + pod.getName() + " and zone: " + dc.getName()); + } + + // If the server's private netmask is less inclusive than the pod's CIDR + // netmask, return false + String cidrNetmask = NetUtils + .getCidrSubNet("255.255.255.255", cidrSize); + long cidrNetmaskNumeric = NetUtils.ip2Long(cidrNetmask); + long serverNetmaskNumeric = NetUtils.ip2Long(serverPrivateNetmask); + if (serverNetmaskNumeric > cidrNetmaskNumeric) { + throw new IllegalArgumentException( + "The private ip address of the server (" + serverPrivateIP + + ") is not compatible with the CIDR of pod: " + + pod.getName() + " and zone: " + dc.getName()); + } + + } + + public void checkIPConflicts(Host.Type type, HostPodVO pod, + DataCenterVO dc, String serverPrivateIP, + String serverPrivateNetmask, String serverPublicIP, + String serverPublicNetmask) { + // If the server's private IP is the same as is public IP, this host has + // a host-only private network. Don't check for conflicts with the + // private IP address table. + if (serverPrivateIP != serverPublicIP) { + if (!_privateIPAddressDao.mark(dc.getId(), pod.getId(), + serverPrivateIP)) { + // If the server's private IP address is already in the + // database, return false + List existingPrivateIPs = _privateIPAddressDao + .listByPodIdDcIdIpAddress(pod.getId(), dc.getId(), + serverPrivateIP); + + assert existingPrivateIPs.size() <= 1 : " How can we get more than one ip address with " + + serverPrivateIP; + if (existingPrivateIPs.size() > 1) { + throw new IllegalArgumentException( + "The private ip address of the server (" + + serverPrivateIP + + ") is already in use in pod: " + + pod.getName() + " and zone: " + + dc.getName()); + } + if (existingPrivateIPs.size() == 1) { + DataCenterIpAddressVO vo = existingPrivateIPs.get(0); + if (vo.getInstanceId() != null) { + throw new IllegalArgumentException( + "The private ip address of the server (" + + serverPrivateIP + + ") is already in use in pod: " + + pod.getName() + " and zone: " + + dc.getName()); + } + } + } + } + + if (serverPublicIP != null + && !_publicIPAddressDao.mark(dc.getId(), serverPublicIP)) { + // If the server's public IP address is already in the database, + // return false + List existingPublicIPs = _publicIPAddressDao + .listByDcIdIpAddress(dc.getId(), serverPublicIP); + if (existingPublicIPs.size() > 0) { + throw new IllegalArgumentException( + "The public ip address of the server (" + + serverPublicIP + + ") is already in use in zone: " + + dc.getName()); + } + } + } + + public HostVO createHost(final StartupCommand startup, + ServerResource resource, Map details, + boolean directFirst, List hostTags) + throws IllegalArgumentException { + Host.Type type = null; + + if (startup instanceof StartupStorageCommand) { + + StartupStorageCommand ssCmd = ((StartupStorageCommand) startup); + if (ssCmd.getResourceType() == StorageResourceType.SECONDARY_STORAGE) { + type = Host.Type.SecondaryStorage; + if (resource != null + && resource instanceof DummySecondaryStorageResource) { + resource = null; + } + } else { + type = Host.Type.Storage; + } + final Map hostDetails = ssCmd.getHostDetails(); + if (hostDetails != null) { + if (details != null) { + details.putAll(hostDetails); + } else { + details = hostDetails; + } + } + } else if (startup instanceof StartupRoutingCommand) { + StartupRoutingCommand ssCmd = ((StartupRoutingCommand) startup); + type = Host.Type.Routing; + final Map hostDetails = ssCmd.getHostDetails(); + if (hostDetails != null) { + if (details != null) { + details.putAll(hostDetails); + } else { + details = hostDetails; + } + } + } else if (startup instanceof StartupProxyCommand) { + type = Host.Type.ConsoleProxy; + } else if (startup instanceof StartupRoutingCommand) { + type = Host.Type.Routing; + } else { + assert false : "Did someone add a new Startup command?"; + } + + Long id = null; + HostVO server = _hostDao.findByGuid(startup.getGuid()); + if (server == null) { + server = _hostDao.findByGuid(startup.getGuidWithoutResource()); + } + if (server != null && server.getRemoved() == null) { + id = server.getId(); + if (s_logger.isDebugEnabled()) { + s_logger.debug("Found the host " + id + " by guid: " + + startup.getGuid()); + } + if (directFirst) { + s_logger.debug("Old host reconnected as new"); + return null; + } + } else { + server = new HostVO(startup.getGuid()); + } + + server.setDetails(details); + server.setHostTags(hostTags); + + updateHost(server, startup, type, _nodeId); + if (resource != null) { + server.setResource(resource.getClass().getName()); + } + if (id == null) { + /* + * // ignore integrity check for agent-simulator + * if(!"0.0.0.0".equals(startup.getPrivateIpAddress()) && + * !"0.0.0.0".equals(startup.getStorageIpAddress())) { if + * (_hostDao.findByPrivateIpAddressInDataCenter + * (server.getDataCenterId(), startup.getPrivateIpAddress()) != + * null) { throw newIllegalArgumentException( + * "The private ip address is already in used: " + + * startup.getPrivateIpAddress()); } + * + * if + * (_hostDao.findByPrivateIpAddressInDataCenter(server.getDataCenterId + * (), startup.getStorageIpAddress()) != null) { throw new + * IllegalArgumentException + * ("The private ip address is already in used: " + + * startup.getStorageIpAddress()); } } + */ + + if (startup instanceof StartupStorageCommand) { + server = _hostDao.persist(server); + id = server.getId(); + } else if (startup instanceof StartupProxyCommand) { + server.setProxyPort(((StartupProxyCommand) startup) + .getProxyPort()); + server = _hostDao.persist(server); + id = server.getId(); + } else if (startup instanceof StartupRoutingCommand) { + server = _hostDao.persist(server); + id = server.getId(); + } + + s_logger.info("New " + server.getType().toString() + + " host connected w/ guid " + startup.getGuid() + + " and id is " + id); + } else { + if (!_hostDao.connect(server, _nodeId)) { + throw new CloudRuntimeException( + "Agent cannot connect because the current state is " + + server.getStatus().toString()); + } + s_logger.info("Old " + server.getType().toString() + + " host reconnected w/ id =" + id); + } + createCapacityEntry(startup, server); + + return server; + } + + public HostVO createHost(final StartupCommand[] startup, + ServerResource resource, Map details, + boolean directFirst, List hostTags) + throws IllegalArgumentException { + StartupCommand firstCmd = startup[0]; + HostVO result = createHost(firstCmd, resource, details, directFirst, + hostTags); + if (result == null) { + return null; + } + return result; + } + + public AgentAttache handleConnect(final Link link, + final StartupCommand[] startup) throws IllegalArgumentException { + HostVO server = createHost(startup, null, null, false, null); + if (server == null) { + return null; + } + long id = server.getId(); + + AgentAttache attache = createAttache(id, server, link); + + attache = notifyMonitorsOfConnection(attache, startup); + + return attache; + } + + public AgentAttache findAgent(long hostId) { + synchronized (_agents) { + return _agents.get(hostId); + } + } + + protected AgentAttache createAttache(long id, HostVO server, Link link) { + s_logger.debug("Adding link for " + id); + final AgentAttache attache = new ConnectedAgentAttache(id, link, + server.getStatus() == Status.Maintenance + || server.getStatus() == Status.ErrorInMaintenance + || server.getStatus() == Status.PrepareForMaintenance); + link.attach(attache); + AgentAttache old = null; + synchronized (_agents) { + old = _agents.put(id, attache); + } + if (old != null) { + old.disconnect(Status.Removed); + } + return attache; + } + + protected AgentAttache createAttache(long id, HostVO server, + ServerResource resource) { + s_logger.debug("Adding directly connect host for " + id); + if (resource instanceof DummySecondaryStorageResource) { + return new DummyAttache(id, false); + } + final DirectAgentAttache attache = new DirectAgentAttache(id, resource, + server.getStatus() == Status.Maintenance + || server.getStatus() == Status.ErrorInMaintenance + || server.getStatus() == Status.PrepareForMaintenance, + this); + AgentAttache old = null; + synchronized (_agents) { + old = _agents.put(id, attache); + } + if (old != null) { + old.disconnect(Status.Removed); + } + return attache; + } + + @Override + public boolean maintenanceFailed(long hostId) { + HostVO host = _hostDao.findById(hostId); + if (host == null) { + if (s_logger.isDebugEnabled()) { + s_logger.debug("Cant not find host " + hostId); + } + return false; + } else { + return _hostDao.updateStatus(host, Event.UnableToMigrate, _nodeId); + } + } + + @Override + public void updateHost(long hostId, long guestOSCategoryId) { + GuestOSCategoryVO guestOSCategory = _guestOSCategoryDao + .findById(guestOSCategoryId); + Map hostDetails = _hostDetailsDao.findDetails(hostId); + + if (guestOSCategory != null) { + // Save a new entry for guest.os.category.id + hostDetails.put("guest.os.category.id", + String.valueOf(guestOSCategory.getId())); + } else { + // Delete any existing entry for guest.os.category.id + hostDetails.remove("guest.os.category.id"); + } + + _hostDetailsDao.persist(hostId, hostDetails); + + } + + protected void updateHost(final HostVO host, final StartupCommand startup, + final Host.Type type, final long msId) + throws IllegalArgumentException { + s_logger.debug("updateHost() called"); + + String dataCenter = startup.getDataCenter(); + String pod = startup.getPod(); + String cluster = startup.getCluster(); + + if (pod != null && dataCenter != null + && pod.equalsIgnoreCase("default") + && dataCenter.equalsIgnoreCase("default")) { + List pods = _podDao.listAll(); + for (HostPodVO hpv : pods) { + if (checkCIDR(type, hpv, startup.getPrivateIpAddress(), + startup.getPrivateNetmask())) { + pod = hpv.getName(); + dataCenter = _dcDao.findById(hpv.getDataCenterId()) + .getName(); + break; + } + } + } + long dcId = -1; + DataCenterVO dc = _dcDao.findByName(dataCenter); + if (dc == null) { + try { + dcId = Long.parseLong(dataCenter); + dc = _dcDao.findById(dcId); + } catch (final NumberFormatException e) { + } + } + if (dc == null) { + throw new IllegalArgumentException("Host " + + startup.getPrivateIpAddress() + + " sent incorrect data center: " + dataCenter); + } + dcId = dc.getId(); + + HostPodVO p = _podDao.findByName(pod, dcId); + if (p == null) { + try { + final long podId = Long.parseLong(pod); + p = _podDao.findById(podId); + } catch (final NumberFormatException e) { + } + } + Long podId = null; + if (p == null) { + if (type != Host.Type.SecondaryStorage) { + + /* + * s_logger.info("Unable to find the pod so we are creating one." + * ); p = createPod(pod, dcId, startup.getPrivateIpAddress(), + * NetUtils.getCidrSize(startup.getPrivateNetmask())); podId = + * p.getId(); + */ + s_logger.error("Host " + startup.getPrivateIpAddress() + + " sent incorrect pod: " + pod + " in " + dataCenter); + throw new IllegalArgumentException("Host " + + startup.getPrivateIpAddress() + + " sent incorrect pod: " + pod + " in " + dataCenter); + } + } else { + podId = p.getId(); + } + + Long clusterId = null; + if (cluster != null) { + try { + clusterId = Long.valueOf(cluster); + } catch (NumberFormatException e) { + ClusterVO c = _clusterDao.findBy(cluster, podId); + if (c == null) { + c = new ClusterVO(dcId, podId, cluster); + c = _clusterDao.persist(c); + } + clusterId = c.getId(); + } + } + + if (type == Host.Type.Routing) { + StartupRoutingCommand scc = (StartupRoutingCommand) startup; + Hypervisor.Type hypervisorType = scc.getHypervisorType(); + boolean doCidrCheck = true; + + // If this command is from the agent simulator, don't do the CIDR + // check + if (scc.getAgentTag() != null + && startup.getAgentTag() + .equalsIgnoreCase("agent-simulator")) + doCidrCheck = false; + + // If this command is from a KVM agent, or from an agent that has a + // null hypervisor type, don't do the CIDR check + if (hypervisorType == null || hypervisorType == Hypervisor.Type.KVM) + doCidrCheck = false; + + if (doCidrCheck) + s_logger.info("Host: " + host.getName() + + " connected with hypervisor type: " + hypervisorType + + ". Checking CIDR..."); + else + s_logger.info("Host: " + host.getName() + + " connected with hypervisor type: " + hypervisorType + + ". Skipping CIDR check..."); + + if (doCidrCheck) { + checkCIDR(type, p, dc, scc.getPrivateIpAddress(), + scc.getPrivateNetmask()); + } + + // Check if the private/public IPs of the server are already in the + // private/public IP address tables + checkIPConflicts(type, p, dc, scc.getPrivateIpAddress(), + scc.getPublicIpAddress(), scc.getPublicIpAddress(), + scc.getPublicNetmask()); + } + + host.setDataCenterId(dc.getId()); + host.setPodId(podId); + host.setClusterId(clusterId); + host.setPrivateIpAddress(startup.getPrivateIpAddress()); + host.setPrivateNetmask(startup.getPrivateNetmask()); + host.setPrivateMacAddress(startup.getPrivateMacAddress()); + host.setPublicIpAddress(startup.getPublicIpAddress()); + host.setPublicMacAddress(startup.getPublicMacAddress()); + host.setPublicNetmask(startup.getPublicNetmask()); + host.setStorageIpAddress(startup.getStorageIpAddress()); + host.setStorageMacAddress(startup.getStorageMacAddress()); + host.setStorageNetmask(startup.getStorageNetmask()); + host.setVersion(startup.getVersion()); + host.setName(startup.getName()); + host.setType(type); + host.setManagementServerId(msId); + host.setStorageUrl(startup.getIqn()); + host.setLastPinged(System.currentTimeMillis() >> 10); + if (startup instanceof StartupRoutingCommand) { + final StartupRoutingCommand scc = (StartupRoutingCommand) startup; + host.setCaps(scc.getCapabilities()); + host.setCpus(scc.getCpus()); + host.setTotalMemory(scc.getMemory()); + host.setSpeed(scc.getSpeed()); + Hypervisor.Type hyType = scc.getHypervisorType(); + if (hyType == null) { + host.setHypervisorType(Hypervisor.Type.Xen); + } else { + host.setHypervisorType(hyType); + } + } else if (startup instanceof StartupStorageCommand) { + final StartupStorageCommand ssc = (StartupStorageCommand) startup; + host.setParent(ssc.getParent()); + host.setTotalSize(ssc.getTotalSize()); + host.setHypervisorType(Hypervisor.Type.None); + if (ssc.getNfsShare() != null) { + host.setStorageUrl(ssc.getNfsShare()); + } + } + if (startup.getStorageIpAddressDeux() != null) { + host.setStorageIpAddressDeux(startup.getStorageIpAddressDeux()); + host.setStorageMacAddressDeux(startup.getStorageMacAddressDeux()); + host.setStorageNetmaskDeux(startup.getStorageNetmaskDeux()); + } + + } + + // create capacity entries if none exist for this server + private void createCapacityEntry(final StartupCommand startup, HostVO server) { + SearchCriteria capacitySC = _capacityDao.createSearchCriteria(); + capacitySC.addAnd("hostOrPoolId", SearchCriteria.Op.EQ, server.getId()); + capacitySC.addAnd("dataCenterId", SearchCriteria.Op.EQ, + server.getDataCenterId()); + capacitySC.addAnd("podId", SearchCriteria.Op.EQ, server.getPodId()); + List capacities = _capacityDao.search(capacitySC, null); + + // remove old entries, we'll recalculate them anyway + if ((capacities != null) && !capacities.isEmpty()) { + for (CapacityVO capacity : capacities) { + if (capacity.getCapacityType() != CapacityVO.CAPACITY_TYPE_SECONDARY_STORAGE) { // Not + // allowing + // secondary + // storage + // to + // be + // deleted + // Bug# + // 7391 + _capacityDao.remove(capacity.getId()); + } + } + } + + if (startup instanceof StartupStorageCommand) { + StartupStorageCommand ssCmd = (StartupStorageCommand) startup; + if (ssCmd.getResourceType() == StorageResourceType.STORAGE_HOST) { + CapacityVO capacity = new CapacityVO(server.getId(), + server.getDataCenterId(), server.getPodId(), 0L, + server.getTotalSize() * _overProvisioningFactor, + CapacityVO.CAPACITY_TYPE_STORAGE_ALLOCATED); + _capacityDao.persist(capacity); + } + } else if (startup instanceof StartupRoutingCommand) { + + CapacityVO capacity = new CapacityVO(server.getId(), + server.getDataCenterId(), server.getPodId(), 0L, + server.getTotalMemory(), CapacityVO.CAPACITY_TYPE_MEMORY); + _capacityDao.persist(capacity); + + capacity = new CapacityVO( + server.getId(), + server.getDataCenterId(), + server.getPodId(), + 0L, + (long) (server.getCpus().longValue() + * server.getSpeed().longValue() * _cpuOverProvisioningFactor), + CapacityVO.CAPACITY_TYPE_CPU); + _capacityDao.persist(capacity); + } + } + + protected void upgradeAgent(final Link link, final byte[] request, + final String reason) { + + if (reason == UnsupportedVersionException.IncompatibleVersion) { + final UpgradeResponse response = new UpgradeResponse(request, + _upgradeMgr.getAgentUrl()); + try { + s_logger.info("Asking for the agent to update due to incompatible version: " + + response.toString()); + link.send(response.toBytes()); + } catch (final ClosedChannelException e) { + s_logger.warn("Unable to send response due to connection closed: " + + response.toString()); + } + return; + } + + assert (reason == UnsupportedVersionException.UnknownVersion) : "Unknown reason: " + + reason; + final UpgradeResponse response = new UpgradeResponse(request, + _upgradeMgr.getAgentUrl()); + try { + s_logger.info("Asking for the agent to update due to unknown version: " + + response.toString()); + link.send(response.toBytes()); + } catch (final ClosedChannelException e) { + s_logger.warn("Unable to send response due to connection closed: " + + response.toString()); + } + } + + protected class SimulateStartTask implements Runnable { + ServerResource resource; + Map details; + long id; + ActionDelegate actionDelegate; + + public SimulateStartTask(long id, ServerResource resource, + Map details, ActionDelegate actionDelegate) { + this.id = id; + this.resource = resource; + this.details = details; + this.actionDelegate = actionDelegate; + } + + @Override + public void run() { + try { + if (s_logger.isDebugEnabled()) { + s_logger.debug("Simulating start for resource " + + resource.getName() + " id " + id); + } + simulateStart(id, resource, details, false, null); + } catch (Exception e) { + + s_logger.warn("Unable to simulate start on resource " + id + + " name " + resource.getName(), e); + } finally { + StackMaid.current().exitCleanup(); + } + } + } + + public class AgentHandler extends Task { + public AgentHandler(Task.Type type, Link link, byte[] data) { + super(type, link, data); + } + + protected void processRequest(final Link link, final Request request) { + AgentAttache attache = (AgentAttache) link.attachment(); + final Command[] cmds = request.getCommands(); + Command cmd = cmds[0]; + boolean logD = true; + + Response response = null; + if (attache == null) { + s_logger.debug("Processing sequence " + request.getSequence() + + ": Processing " + request.toString()); + if (!(cmd instanceof StartupCommand)) { + s_logger.warn("Throwing away a request because it came through as the first command on a connect: " + + request.toString()); + return; + } + StartupCommand startup = (StartupCommand) cmd; + if ((_upgradeMgr.registerForUpgrade(-1, startup.getVersion()) == UpgradeManager.State.RequiresUpdate) + && (_upgradeMgr.getAgentUrl() != null)) { + final UpgradeCommand upgrade = new UpgradeCommand( + _upgradeMgr.getAgentUrl()); + final Request req = new Request(1, -1, -1, + new Command[] { upgrade }, true, true); + s_logger.info("Agent requires upgrade: " + req.toString()); + try { + link.send(req.toBytes()); + } catch (ClosedChannelException e) { + s_logger.warn("Unable to tell agent it should update."); + } + return; + } + try { + StartupCommand[] startups = new StartupCommand[cmds.length]; + for (int i = 0; i < cmds.length; i++) + startups[i] = (StartupCommand) cmds[i]; + attache = handleConnect(link, startups); + } catch (final IllegalArgumentException e) { + _alertMgr.sendAlert( + AlertManager.ALERT_TYPE_HOST, + 0, + new Long(0), + "Agent from " + startup.getPrivateIpAddress() + + " is unable to connect due to " + + e.getMessage(), + "Agent from " + startup.getPrivateIpAddress() + + " is unable to connect with " + + request.toString() + " because of " + + e.getMessage()); + s_logger.warn("Unable to create attache for agent: " + + request.toString(), e); + response = new Response(request, new StartupAnswer( + (StartupCommand) cmd, e.getMessage()), _nodeId, -1); + } catch (final CloudRuntimeException e) { + _alertMgr.sendAlert( + AlertManager.ALERT_TYPE_HOST, + 0, + new Long(0), + "Agent from " + startup.getPrivateIpAddress() + + " is unable to connect due to " + + e.getMessage(), + "Agent from " + startup.getPrivateIpAddress() + + " is unable to connect with " + + request.toString() + " because of " + + e.getMessage()); + s_logger.warn("Unable to create attache for agent: " + + request.toString(), e); + } + if (attache == null) { + if (response == null) { + s_logger.warn("Unable to create attache for agent: " + + request.toString()); + response = new Response(request, new StartupAnswer( + (StartupCommand) request.getCommand(), + "Unable to register this agent"), _nodeId, -1); + } + try { + link.send(response.toBytes(), true); + } catch (final ClosedChannelException e) { + s_logger.warn("Response was not sent: " + + response.toString()); + } + return; + } + } + + final long hostId = attache.getId(); + + if (s_logger.isDebugEnabled()) { + if (cmd instanceof PingRoutingCommand) { + final PingRoutingCommand ping = (PingRoutingCommand) cmd; + if (ping.getNewStates().size() > 0) { + s_logger.debug("SeqA " + hostId + "-" + + request.getSequence() + ": Processing " + + request.toString()); + } else { + logD = false; + s_logger.debug("Ping from " + hostId); + s_logger.trace("SeqA " + hostId + "-" + + request.getSequence() + ": Processing " + + request.toString()); + } + } else if (cmd instanceof PingCommand) { + logD = false; + s_logger.debug("Ping from " + hostId); + s_logger.trace("SeqA " + attache.getId() + "-" + + request.getSequence() + ": Processing " + + request.toString()); + } else { + s_logger.debug("SeqA " + attache.getId() + "-" + + request.getSequence() + ": Processing " + + request.toString()); + } + } + + final Answer[] answers = new Answer[cmds.length]; + for (int i = 0; i < cmds.length; i++) { + cmd = cmds[i]; + Answer answer = null; + try { + if (cmd instanceof StartupRoutingCommand) { + final StartupRoutingCommand startup = (StartupRoutingCommand) cmd; + answer = new StartupAnswer(startup, attache.getId(), + getPingInterval()); + } else if (cmd instanceof StartupProxyCommand) { + final StartupProxyCommand startup = (StartupProxyCommand) cmd; + answer = new StartupAnswer(startup, attache.getId(), + getPingInterval()); + } else if (cmd instanceof StartupStorageCommand) { + final StartupStorageCommand startup = (StartupStorageCommand) cmd; + answer = new StartupAnswer(startup, attache.getId(), + getPingInterval()); + } else if (cmd instanceof ShutdownCommand) { + final ShutdownCommand shutdown = (ShutdownCommand) cmd; + final String reason = shutdown.getReason(); + s_logger.info("Host " + + attache.getId() + + " has informed us that it is shutting down with reason " + + reason + " and detail " + + shutdown.getDetail()); + if (reason.equals(ShutdownCommand.Update)) { + disconnect(attache, Event.UpdateNeeded, false); + } else if (reason.equals(ShutdownCommand.Requested)) { + disconnect(attache, Event.ShutdownRequested, false); + } + return; + } else if (cmd instanceof AgentControlCommand) { + answer = handleControlCommand(attache, + (AgentControlCommand) cmd); + } else { + handleCommands(attache, request.getSequence(), + new Command[] { cmd }); + if (cmd instanceof PingCommand) { + long cmdHostId = ((PingCommand) cmd).getHostId(); + + // if the router is sending a ping, verify the + // gateway was pingable + if (cmd instanceof PingRoutingCommand) { + boolean gatewayAccessible = ((PingRoutingCommand) cmd) + .isGatewayAccessible(); + HostVO host = _hostDao.findById(Long + .valueOf(cmdHostId)); + if (!gatewayAccessible) { + // alert that host lost connection to + // gateway (cannot ping the default route) + DataCenterVO dcVO = _dcDao.findById(host + .getDataCenterId()); + HostPodVO podVO = _podDao.findById(host + .getPodId()); + String hostDesc = "name: " + host.getName() + + " (id:" + host.getId() + + "), availability zone: " + + dcVO.getName() + ", pod: " + + podVO.getName(); + + _alertMgr + .sendAlert( + AlertManager.ALERT_TYPE_ROUTING, + host.getDataCenterId(), + host.getPodId(), + "Host lost connection to gateway, " + + hostDesc, + "Host [" + + hostDesc + + "] lost connection to gateway (default route) and is possibly having network connection issues."); + } else { + _alertMgr.clearAlert( + AlertManager.ALERT_TYPE_ROUTING, + host.getDataCenterId(), + host.getPodId()); + } + } + answer = new PingAnswer((PingCommand) cmd); + } else if (cmd instanceof ReadyAnswer) { + HostVO host = _hostDao.findById(attache.getId()); + if (host == null) { + if (s_logger.isDebugEnabled()) { + s_logger.debug("Cant not find host " + + attache.getId()); + } + } else { + s_logger.info("Host " + + attache.getId() + + " is now ready to processing commands."); + _hostDao.updateStatus(host, Event.Ready, + _nodeId); + } + } else { + answer = new Answer(cmd); + } + } + } catch (final Throwable th) { + s_logger.warn("Caught: ", th); + answer = new Answer(cmd, false, th.getMessage()); + } + answers[i] = answer; + } + + response = new Response(request, answers, _nodeId, attache.getId()); + if (s_logger.isDebugEnabled()) { + if (logD) { + s_logger.debug("SeqA " + attache.getId() + "-" + + response.getSequence() + ": Sending " + + response.toString()); + } else { + s_logger.trace("SeqA " + attache.getId() + "-" + + response.getSequence() + ": Sending " + + response.toString()); + } + } + try { + link.send(response.toBytes()); + } catch (final ClosedChannelException e) { + s_logger.warn("Unable to send response because connection is closed: " + + response.toString()); + } + } + + protected void processResponse(final Link link, final Response response) { + final AgentAttache attache = (AgentAttache) link.attachment(); + if (attache == null) { + s_logger.warn("Unable to process: " + response.toString()); + } + + if (!attache.processAnswers(response.getSequence(), response)) { + s_logger.info("Host " + attache.getId() + " - Seq " + + response.getSequence() + + ": Response is not processed: " + response.toString()); + } + } + + @Override + protected void doTask(final Task task) throws Exception { + Transaction txn = Transaction.open(Transaction.CLOUD_DB); + try { + final Type type = task.getType(); + if (type == Task.Type.DATA) { + final byte[] data = task.getData(); + try { + final Request event = Request.parse(data); + if (event instanceof Response) { + processResponse(task.getLink(), (Response) event); + } else { + processRequest(task.getLink(), event); + } + } catch (final UnsupportedVersionException e) { + s_logger.warn(e.getMessage()); + upgradeAgent(task.getLink(), data, e.getReason()); + } + } else if (type == Task.Type.CONNECT) { + } else if (type == Task.Type.DISCONNECT) { + final Link link = task.getLink(); + final AgentAttache attache = (AgentAttache) link + .attachment(); + if (attache != null) { + disconnect(attache, Event.AgentDisconnected, true); + } else { + s_logger.info("Connection from " + link.getIpAddress() + + " closed but no cleanup was done."); + link.close(); + link.terminated(); + } + } + } finally { + StackMaid.current().exitCleanup(); + txn.close(); + } + } + } + + protected AgentManagerImpl() { + } } diff --git a/server/src/com/cloud/configuration/Config.java b/server/src/com/cloud/configuration/Config.java index ddead082eae..bcfdc2b99bd 100644 --- a/server/src/com/cloud/configuration/Config.java +++ b/server/src/com/cloud/configuration/Config.java @@ -178,7 +178,8 @@ public enum Config { SSOKey("Hidden", ManagementServer.class, String.class, "security.singlesignon.key", null, "A Single Sign-On key used for logging into the cloud", null), SSOAuthTolerance("Advanced", ManagementServer.class, Long.class, "security.singlesignon.tolerance.millis", "300000", "The allowable clock difference in milliseconds between when an SSO login request is made and when it is received.", null), HashKey("Hidden", ManagementServer.class, String.class, "security.hash.key", null, "for generic key-ed hash", null), - + DirectAgentLoadSize("Advanced", ManagementServer.class, Integer.class, "direct.agent.load.size", "16", "The number of direct agents to load each time", null), + DefaultPageSize("Advanced", ManagementServer.class, Integer.class, "default.page.size", "500", "Default page size for API list* commands", null); private final String _category;