From 4e595d04abedc1b11d9d50388ec289d8dc8fdc64 Mon Sep 17 00:00:00 2001 From: frank Date: Wed, 19 Oct 2011 17:51:20 -0700 Subject: [PATCH] Bug 11522 - New agent manager 1.load hosts that in maintenance mode because maintenance is no longer an agent status now 2.don't disconnect agent when entering maintenance mode, again it's no longer an agent status now --- build/developer.xml | 4 +- server/src/com/cloud/agent/AgentManager.java | 2 + .../cloud/agent/manager/AgentManagerImpl.java | 8 ++ .../src/com/cloud/host/dao/HostDaoImpl.java | 2 +- .../cloud/resource/ResourceManagerImpl.java | 4 +- .../src/com/cloud/server/StatsCollector.java | 5 +- .../resourceMgrTest/testDeploy100Host.py | 78 ++++++++++++++++++- 7 files changed, 94 insertions(+), 9 deletions(-) diff --git a/build/developer.xml b/build/developer.xml index e6959ba6a7d..387d62cb971 100755 --- a/build/developer.xml +++ b/build/developer.xml @@ -242,11 +242,11 @@ - + - + diff --git a/server/src/com/cloud/agent/AgentManager.java b/server/src/com/cloud/agent/AgentManager.java index ed48d8cd527..88d5b70544f 100755 --- a/server/src/com/cloud/agent/AgentManager.java +++ b/server/src/com/cloud/agent/AgentManager.java @@ -215,6 +215,8 @@ public interface AgentManager extends Manager { public boolean disconnectAgent(HostVO host, Status.Event e, long msId); public void pullAgentToMaintenance(long hostId); + + public void pullAgentOutMaintenance(long hostId); boolean reconnect(long hostId); } diff --git a/server/src/com/cloud/agent/manager/AgentManagerImpl.java b/server/src/com/cloud/agent/manager/AgentManagerImpl.java index 9dc9d3023b4..4a8af6b4fd9 100755 --- a/server/src/com/cloud/agent/manager/AgentManagerImpl.java +++ b/server/src/com/cloud/agent/manager/AgentManagerImpl.java @@ -1710,4 +1710,12 @@ public class AgentManagerImpl implements AgentManager, HandlerFactory, Manager { attache.cancelAllCommands(Status.Disconnected, false); } } + + @Override + public void pullAgentOutMaintenance(long hostId) { + AgentAttache attache = findAttache(hostId); + if (attache != null) { + attache.setMaintenanceMode(false); + } + } } diff --git a/server/src/com/cloud/host/dao/HostDaoImpl.java b/server/src/com/cloud/host/dao/HostDaoImpl.java index 87c76c47661..de24c6a848d 100755 --- a/server/src/com/cloud/host/dao/HostDaoImpl.java +++ b/server/src/com/cloud/host/dao/HostDaoImpl.java @@ -326,7 +326,7 @@ public class HostDaoImpl extends GenericDaoBase implements HostDao txn.start(); SearchCriteria sc = UnmanagedDirectConnectSearch.create(); sc.setParameters("lastPinged", lastPingSecondsAfter); - sc.setParameters("resourceStates", ResourceState.ErrorInMaintenance, ResourceState.Maintenance, ResourceState.PrepareForMaintenance, ResourceState.Disabled); + //sc.setParameters("resourceStates", ResourceState.ErrorInMaintenance, ResourceState.Maintenance, ResourceState.PrepareForMaintenance, ResourceState.Disabled); sc.setJoinParameters("ClusterManagedSearch", "managed", Managed.ManagedState.Managed); List hosts = lockRows(sc, new Filter(HostVO.class, "clusterId", true, 0L, limit), true); diff --git a/server/src/com/cloud/resource/ResourceManagerImpl.java b/server/src/com/cloud/resource/ResourceManagerImpl.java index bcc6428dd10..522260f7a0e 100755 --- a/server/src/com/cloud/resource/ResourceManagerImpl.java +++ b/server/src/com/cloud/resource/ResourceManagerImpl.java @@ -975,7 +975,7 @@ public class ResourceManagerImpl implements ResourceManager, ResourceService, Ma return (_agentMgr.reconnect(hostId) ? host : null); } - + @Override public boolean resourceStateTransitTo(Host host, ResourceState.Event event, long msId) throws NoTransitionException { ResourceState currentState = host.getResourceState(); @@ -1674,7 +1674,7 @@ public class ResourceManagerImpl implements ResourceManager, ResourceService, Ma try { resourceStateTransitTo(host, ResourceState.Event.AdminCancelMaintenance, _nodeId); - _agentMgr.disconnectWithoutInvestigation(hostId, Status.Event.ResetRequested); + _agentMgr.pullAgentOutMaintenance(hostId); return true; } catch (NoTransitionException e) { s_logger.debug("Cannot transmit host " + host.getId() + "to Enabled state", e); diff --git a/server/src/com/cloud/server/StatsCollector.java b/server/src/com/cloud/server/StatsCollector.java index 958f154bd5d..e7f27be330a 100755 --- a/server/src/com/cloud/server/StatsCollector.java +++ b/server/src/com/cloud/server/StatsCollector.java @@ -46,6 +46,7 @@ import com.cloud.host.HostStats; import com.cloud.host.HostVO; import com.cloud.host.Status; import com.cloud.host.dao.HostDao; +import com.cloud.resource.ResourceState; import com.cloud.storage.StorageManager; import com.cloud.storage.StoragePoolHostVO; import com.cloud.storage.StoragePoolVO; @@ -156,7 +157,8 @@ public class StatsCollector { s_logger.debug("HostStatsCollector is running..."); SearchCriteria sc = _hostDao.createSearchCriteria(); - sc.addAnd("status", SearchCriteria.Op.EQ, Status.Up.toString()); + sc.addAnd("status", SearchCriteria.Op.EQ, Status.Up.toString()); + sc.addAnd("resourceState", SearchCriteria.Op.NIN, ResourceState.Maintenance, ResourceState.PrepareForMaintenance, ResourceState.ErrorInMaintenance); sc.addAnd("type", SearchCriteria.Op.NEQ, Host.Type.Storage.toString()); sc.addAnd("type", SearchCriteria.Op.NEQ, Host.Type.ConsoleProxy.toString()); sc.addAnd("type", SearchCriteria.Op.NEQ, Host.Type.SecondaryStorage.toString()); @@ -196,6 +198,7 @@ public class StatsCollector { SearchCriteria sc = _hostDao.createSearchCriteria(); sc.addAnd("status", SearchCriteria.Op.EQ, Status.Up.toString()); + sc.addAnd("resourceState", SearchCriteria.Op.NIN, ResourceState.Maintenance, ResourceState.PrepareForMaintenance, ResourceState.ErrorInMaintenance); sc.addAnd("type", SearchCriteria.Op.NEQ, Host.Type.Storage.toString()); sc.addAnd("type", SearchCriteria.Op.NEQ, Host.Type.ConsoleProxy.toString()); sc.addAnd("type", SearchCriteria.Op.NEQ, Host.Type.SecondaryStorage.toString()); diff --git a/tools/testClient/resourceMgrTest/testDeploy100Host.py b/tools/testClient/resourceMgrTest/testDeploy100Host.py index fbb3f8e808c..6d3b1ccaf0a 100755 --- a/tools/testClient/resourceMgrTest/testDeploy100Host.py +++ b/tools/testClient/resourceMgrTest/testDeploy100Host.py @@ -6,17 +6,89 @@ Created on Oct 18, 2011 from cloudstackTestCase import * from cloudstackAPI import * import uuid +import threading +import random +import time + +class Task(threading.Thread): + def __init__(self, func, param=None): + super(Task, self).__init__() + self.func = func + self.param = param + + def run(self): + self.func(self.param) + + def doTask(self): + self.start() class TestDeploy100Hosts(cloudstackTestCase): - def test_deploy100Hosts(self): + hosts = [] + def deployHost(self, url): apiClient = self.testClient.getApiClient() addHostCmd = addHost.addHostCmd() addHostCmd.hypervisor = "simulator" addHostCmd.clusterid = 1 addHostCmd.zoneid = 1 addHostCmd.podid = 1 - addHostCmd.url = "http://sim/10.223.63.1" + addHostCmd.url = "http://sim/%s"%url addHostCmd.username = "placeholder" addHostCmd.password = "placeholder" addHostResponce = apiClient.addHost(addHostCmd) - return addHostResponce.id \ No newline at end of file + return addHostResponce[0].id + + def randomCancelMaintenance(self): + def run(param): + while(1): + try: + interval = random.randint(1, 2) + time.sleep(interval) + if len(self.hosts) == 0: + continue + + index = random.randint(0, len(self.hosts)-1) + hostId = self.hosts[index] + apiClient = self.testClient.getApiClient() + cMaintainCmd = cancelHostMaintenance.cancelHostMaintenanceCmd() + cMaintainCmd.id = hostId + response = apiClient.cancelHostMaintenance(cMaintainCmd) + id = response.id + print "Host %s cancelled maintenance mode" % id + except Exception, e: + print e + + t = Task(run) + t.doTask() + + def randomEnterMaintenance(self): + def run(param): + while(1): + try: + interval = random.randint(1, 2) + time.sleep(interval) + if len(self.hosts) == 0: + continue + index = random.randint(0, len(self.hosts)-1) + hostId = self.hosts[index] + apiClient = self.testClient.getApiClient() + maintainCmd = prepareHostForMaintenance.prepareHostForMaintenanceCmd() + maintainCmd.id = hostId + response = apiClient.prepareHostForMaintenance(maintainCmd) + id = response.id + print "Host %s entered maintenance mode" % id + except Exception, e: + print e + + t = Task(run) + t.doTask() + + + def test_deploy100Hosts(self): + #for i in range(200): + #self.hosts.append(self.deployHost(i)) + for i in range(200): + self.hosts.append(i) + self.randomEnterMaintenance() + self.randomCancelMaintenance() + while(1): time.sleep(10000) +