mirror of https://github.com/apache/cloudstack.git
MS maintenance improvements (#10417)
* Update last agents during ms maintenance, and some code improvements
* Send 503 (Service Unavailable) response status when maintenance or shutdown is initiated
[Any load balancer in the clustered environment can avoid routing requests to this MS node]
* Migrate systemvm agents before routing host agents, and some code improvements
* Added events for ms maintenance and shutdown operations
* Added the following ms maintenance and shutdown improvements
- block new agent connections during prepare for maintenance of ms
- maintain avoids ms list
- propagate updated management servers list and lb algorithm in host and indirect.agent.lb.algorithm settings respectively, to systemvm (non-routing) agents
- updated setup ms list and migrate agent connections to executor service
- migrate agent connection through executor, and send the answer to the ms host that initiated the migration
- re-initialize ssl handshake executor if it is shutdown
- don't allow prepare for maintenance or shutdown when other management server nodes are in preparing states
- don't allow trigger shutdown when management server is up and other management server nodes are in preparing states
- stop agent connections monitor on ms maintenance
- update avoid ms list in ready command
- updated connected host from the client connection
- update last agents in ms metrics from the database
- updated some agent config descriptions
- update last management server in the hosts during shutdown
- added agents and lastagents in management server response
- updated management server maintenance & shutdown unit tests
- some code improvements
* refactored code / addressed comments
* removed shutdown testcase (maybe, calling System.exit)
* Revert "removed shutdown testcase (maybe, calling System.exit)"
This reverts commit e14b071715.
* avoid system.exit during shutdown test
* code improvements
* testcase fix
* Fix cutoff time in agent connections monitor thread
This commit is contained in:
parent
ea36568e47
commit
9dceae4614
|
|
@ -342,7 +342,7 @@ public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater
|
||||||
logger.info("Attempted to connect to the server, but received an unexpected exception, trying again...", e);
|
logger.info("Attempted to connect to the server, but received an unexpected exception, trying again...", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
shell.updateConnectedHost();
|
shell.updateConnectedHost(((NioClient)connection).getHost());
|
||||||
scavengeOldAgentObjects();
|
scavengeOldAgentObjects();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -617,15 +617,11 @@ public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void reconnect(final Link link) {
|
protected void reconnect(final Link link) {
|
||||||
reconnect(link, null, null, false);
|
reconnect(link, null, false);
|
||||||
}
|
}
|
||||||
|
|
||||||
protected void reconnect(final Link link, String preferredHost, List<String> avoidHostList, boolean forTransfer) {
|
protected void reconnect(final Link link, String preferredMSHost, boolean forTransfer) {
|
||||||
if (!(forTransfer || reconnectAllowed)) {
|
if (!(forTransfer || reconnectAllowed)) {
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (!reconnectAllowed) {
|
|
||||||
logger.debug("Reconnect requested but it is not allowed {}", () -> getLinkLog(link));
|
logger.debug("Reconnect requested but it is not allowed {}", () -> getLinkLog(link));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
@ -637,19 +633,26 @@ public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater
|
||||||
serverResource.disconnected();
|
serverResource.disconnected();
|
||||||
logger.info("Lost connection to host: {}. Attempting reconnection while we still have {} commands in progress.", shell.getConnectedHost(), commandsInProgress.get());
|
logger.info("Lost connection to host: {}. Attempting reconnection while we still have {} commands in progress.", shell.getConnectedHost(), commandsInProgress.get());
|
||||||
stopAndCleanupConnection(true);
|
stopAndCleanupConnection(true);
|
||||||
|
String host = preferredMSHost;
|
||||||
|
if (org.apache.commons.lang3.StringUtils.isBlank(host)) {
|
||||||
|
host = shell.getNextHost();
|
||||||
|
}
|
||||||
|
List<String> avoidMSHostList = shell.getAvoidHosts();
|
||||||
do {
|
do {
|
||||||
final String host = shell.getNextHost();
|
if (CollectionUtils.isEmpty(avoidMSHostList) || !avoidMSHostList.contains(host)) {
|
||||||
connection = new NioClient(getAgentName(), host, shell.getPort(), shell.getWorkers(), shell.getSslHandshakeTimeout(), this);
|
connection = new NioClient(getAgentName(), host, shell.getPort(), shell.getWorkers(), shell.getSslHandshakeTimeout(), this);
|
||||||
logger.info("Reconnecting to host: {}", host);
|
logger.info("Reconnecting to host: {}", host);
|
||||||
try {
|
try {
|
||||||
connection.start();
|
connection.start();
|
||||||
} catch (final NioConnectionException e) {
|
} catch (final NioConnectionException e) {
|
||||||
logger.info("Attempted to re-connect to the server, but received an unexpected exception, trying again...", e);
|
logger.info("Attempted to re-connect to the server, but received an unexpected exception, trying again...", e);
|
||||||
stopAndCleanupConnection(false);
|
stopAndCleanupConnection(false);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
shell.getBackoffAlgorithm().waitBeforeRetry();
|
shell.getBackoffAlgorithm().waitBeforeRetry();
|
||||||
|
host = shell.getNextHost();
|
||||||
} while (!connection.isStartup());
|
} while (!connection.isStartup());
|
||||||
shell.updateConnectedHost();
|
shell.updateConnectedHost(((NioClient)connection).getHost());
|
||||||
logger.info("Connected to the host: {}", shell.getConnectedHost());
|
logger.info("Connected to the host: {}", shell.getConnectedHost());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -922,7 +925,7 @@ public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater
|
||||||
return new SetupCertificateAnswer(true);
|
return new SetupCertificateAnswer(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
private void processManagementServerList(final List<String> msList, final String lbAlgorithm, final Long lbCheckInterval) {
|
private void processManagementServerList(final List<String> msList, final List<String> avoidMsList, final String lbAlgorithm, final Long lbCheckInterval) {
|
||||||
if (CollectionUtils.isNotEmpty(msList) && StringUtils.isNotEmpty(lbAlgorithm)) {
|
if (CollectionUtils.isNotEmpty(msList) && StringUtils.isNotEmpty(lbAlgorithm)) {
|
||||||
try {
|
try {
|
||||||
final String newMSHosts = String.format("%s%s%s", com.cloud.utils.StringUtils.toCSVList(msList), IAgentShell.hostLbAlgorithmSeparator, lbAlgorithm);
|
final String newMSHosts = String.format("%s%s%s", com.cloud.utils.StringUtils.toCSVList(msList), IAgentShell.hostLbAlgorithmSeparator, lbAlgorithm);
|
||||||
|
|
@ -934,6 +937,7 @@ public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater
|
||||||
throw new CloudRuntimeException("Could not persist received management servers list", e);
|
throw new CloudRuntimeException("Could not persist received management servers list", e);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
shell.setAvoidHosts(avoidMsList);
|
||||||
if ("shuffle".equals(lbAlgorithm)) {
|
if ("shuffle".equals(lbAlgorithm)) {
|
||||||
scheduleHostLBCheckerTask(0);
|
scheduleHostLBCheckerTask(0);
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -942,16 +946,18 @@ public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater
|
||||||
}
|
}
|
||||||
|
|
||||||
private Answer setupManagementServerList(final SetupMSListCommand cmd) {
|
private Answer setupManagementServerList(final SetupMSListCommand cmd) {
|
||||||
processManagementServerList(cmd.getMsList(), cmd.getLbAlgorithm(), cmd.getLbCheckInterval());
|
processManagementServerList(cmd.getMsList(), cmd.getAvoidMsList(), cmd.getLbAlgorithm(), cmd.getLbCheckInterval());
|
||||||
return new SetupMSListAnswer(true);
|
return new SetupMSListAnswer(true);
|
||||||
}
|
}
|
||||||
|
|
||||||
private Answer migrateAgentToOtherMS(final MigrateAgentConnectionCommand cmd) {
|
private Answer migrateAgentToOtherMS(final MigrateAgentConnectionCommand cmd) {
|
||||||
try {
|
try {
|
||||||
if (CollectionUtils.isNotEmpty(cmd.getMsList())) {
|
if (CollectionUtils.isNotEmpty(cmd.getMsList())) {
|
||||||
processManagementServerList(cmd.getMsList(), cmd.getLbAlgorithm(), cmd.getLbCheckInterval());
|
processManagementServerList(cmd.getMsList(), cmd.getAvoidMsList(), cmd.getLbAlgorithm(), cmd.getLbCheckInterval());
|
||||||
}
|
}
|
||||||
migrateAgentConnection(cmd.getAvoidMsList());
|
Executors.newSingleThreadScheduledExecutor(new NamedThreadFactory("MigrateAgentConnection-Job")).schedule(() -> {
|
||||||
|
migrateAgentConnection(cmd.getAvoidMsList());
|
||||||
|
}, 3, TimeUnit.SECONDS);
|
||||||
} catch (Exception e) {
|
} catch (Exception e) {
|
||||||
String errMsg = "Migrate agent connection failed, due to " + e.getMessage();
|
String errMsg = "Migrate agent connection failed, due to " + e.getMessage();
|
||||||
logger.debug(errMsg, e);
|
logger.debug(errMsg, e);
|
||||||
|
|
@ -972,25 +978,26 @@ public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater
|
||||||
throw new CloudRuntimeException("No other Management Server hosts to migrate");
|
throw new CloudRuntimeException("No other Management Server hosts to migrate");
|
||||||
}
|
}
|
||||||
|
|
||||||
String preferredHost = null;
|
String preferredMSHost = null;
|
||||||
for (String msHost : msHostsList) {
|
for (String msHost : msHostsList) {
|
||||||
try (final Socket socket = new Socket()) {
|
try (final Socket socket = new Socket()) {
|
||||||
socket.connect(new InetSocketAddress(msHost, shell.getPort()), 5000);
|
socket.connect(new InetSocketAddress(msHost, shell.getPort()), 5000);
|
||||||
preferredHost = msHost;
|
preferredMSHost = msHost;
|
||||||
break;
|
break;
|
||||||
} catch (final IOException e) {
|
} catch (final IOException e) {
|
||||||
throw new CloudRuntimeException("Management server host: " + msHost + " is not reachable, to migrate connection");
|
throw new CloudRuntimeException("Management server host: " + msHost + " is not reachable, to migrate connection");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if (preferredHost == null) {
|
if (preferredMSHost == null) {
|
||||||
throw new CloudRuntimeException("Management server host(s) are not reachable, to migrate connection");
|
throw new CloudRuntimeException("Management server host(s) are not reachable, to migrate connection");
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.debug("Management server host " + preferredHost + " is found to be reachable, trying to reconnect");
|
logger.debug("Management server host " + preferredMSHost + " is found to be reachable, trying to reconnect");
|
||||||
shell.resetHostCounter();
|
shell.resetHostCounter();
|
||||||
|
shell.setAvoidHosts(avoidMsList);
|
||||||
shell.setConnectionTransfer(true);
|
shell.setConnectionTransfer(true);
|
||||||
reconnect(link, preferredHost, avoidMsList, true);
|
reconnect(link, preferredMSHost, true);
|
||||||
}
|
}
|
||||||
|
|
||||||
public void processResponse(final Response response, final Link link) {
|
public void processResponse(final Response response, final Link link) {
|
||||||
|
|
@ -1003,14 +1010,21 @@ public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater
|
||||||
for (final IAgentControlListener listener : controlListeners) {
|
for (final IAgentControlListener listener : controlListeners) {
|
||||||
listener.processControlResponse(response, (AgentControlAnswer)answer);
|
listener.processControlResponse(response, (AgentControlAnswer)answer);
|
||||||
}
|
}
|
||||||
} else if (answer instanceof PingAnswer && (((PingAnswer) answer).isSendStartup()) && reconnectAllowed) {
|
} else if (answer instanceof PingAnswer) {
|
||||||
logger.info("Management server requested startup command to reinitialize the agent");
|
processPingAnswer((PingAnswer) answer);
|
||||||
sendStartup(link);
|
|
||||||
} else {
|
} else {
|
||||||
updateLastPingResponseTime();
|
updateLastPingResponseTime();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void processPingAnswer(final PingAnswer answer) {
|
||||||
|
if ((answer.isSendStartup()) && reconnectAllowed) {
|
||||||
|
logger.info("Management server requested startup command to reinitialize the agent");
|
||||||
|
sendStartup(link);
|
||||||
|
}
|
||||||
|
shell.setAvoidHosts(answer.getAvoidMsList());
|
||||||
|
}
|
||||||
|
|
||||||
public void processReadyCommand(final Command cmd) {
|
public void processReadyCommand(final Command cmd) {
|
||||||
final ReadyCommand ready = (ReadyCommand)cmd;
|
final ReadyCommand ready = (ReadyCommand)cmd;
|
||||||
// Set human readable sizes;
|
// Set human readable sizes;
|
||||||
|
|
@ -1027,7 +1041,7 @@ public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater
|
||||||
}
|
}
|
||||||
|
|
||||||
verifyAgentArch(ready.getArch());
|
verifyAgentArch(ready.getArch());
|
||||||
processManagementServerList(ready.getMsHostList(), ready.getLbAlgorithm(), ready.getLbCheckInterval());
|
processManagementServerList(ready.getMsHostList(), ready.getAvoidMsHostList(), ready.getLbAlgorithm(), ready.getLbCheckInterval());
|
||||||
|
|
||||||
logger.info("Ready command is processed for agent [id: {}, uuid: {}, name: {}]", getId(), getUuid(), getName());
|
logger.info("Ready command is processed for agent [id: {}, uuid: {}, name: {}]", getId(), getUuid(), getName());
|
||||||
}
|
}
|
||||||
|
|
@ -1374,26 +1388,26 @@ public class Agent implements HandlerFactory, IAgentControl, AgentStatusUpdater
|
||||||
if (msList == null || msList.length < 1) {
|
if (msList == null || msList.length < 1) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
final String preferredHost = msList[0];
|
final String preferredMSHost = msList[0];
|
||||||
final String connectedHost = shell.getConnectedHost();
|
final String connectedHost = shell.getConnectedHost();
|
||||||
logger.debug("Running preferred host checker task, connected host={}, preferred host={}",
|
logger.debug("Running preferred host checker task, connected host={}, preferred host={}",
|
||||||
connectedHost, preferredHost);
|
connectedHost, preferredMSHost);
|
||||||
if (preferredHost == null || preferredHost.equals(connectedHost) || link == null) {
|
if (preferredMSHost == null || preferredMSHost.equals(connectedHost) || link == null) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
boolean isHostUp = false;
|
boolean isHostUp = false;
|
||||||
try (final Socket socket = new Socket()) {
|
try (final Socket socket = new Socket()) {
|
||||||
socket.connect(new InetSocketAddress(preferredHost, shell.getPort()), 5000);
|
socket.connect(new InetSocketAddress(preferredMSHost, shell.getPort()), 5000);
|
||||||
isHostUp = true;
|
isHostUp = true;
|
||||||
} catch (final IOException e) {
|
} catch (final IOException e) {
|
||||||
logger.debug("Host: {} is not reachable", preferredHost);
|
logger.debug("Host: {} is not reachable", preferredMSHost);
|
||||||
}
|
}
|
||||||
if (isHostUp && link != null && commandsInProgress.get() == 0) {
|
if (isHostUp && link != null && commandsInProgress.get() == 0) {
|
||||||
if (logger.isDebugEnabled()) {
|
if (logger.isDebugEnabled()) {
|
||||||
logger.debug("Preferred host {} is found to be reachable, trying to reconnect", preferredHost);
|
logger.debug("Preferred host {} is found to be reachable, trying to reconnect", preferredMSHost);
|
||||||
}
|
}
|
||||||
shell.resetHostCounter();
|
shell.resetHostCounter();
|
||||||
reconnect(link);
|
reconnect(link, preferredMSHost, false);
|
||||||
}
|
}
|
||||||
} catch (Throwable t) {
|
} catch (Throwable t) {
|
||||||
logger.error("Error caught while attempting to connect to preferred host", t);
|
logger.error("Error caught while attempting to connect to preferred host", t);
|
||||||
|
|
|
||||||
|
|
@ -66,6 +66,7 @@ public class AgentShell implements IAgentShell, Daemon {
|
||||||
private String _zone;
|
private String _zone;
|
||||||
private String _pod;
|
private String _pod;
|
||||||
private String _host;
|
private String _host;
|
||||||
|
private List<String> _avoidHosts;
|
||||||
private String _privateIp;
|
private String _privateIp;
|
||||||
private int _port;
|
private int _port;
|
||||||
private int _proxyPort;
|
private int _proxyPort;
|
||||||
|
|
@ -76,7 +77,6 @@ public class AgentShell implements IAgentShell, Daemon {
|
||||||
private volatile boolean _exit = false;
|
private volatile boolean _exit = false;
|
||||||
private int _pingRetries;
|
private int _pingRetries;
|
||||||
private final List<Agent> _agents = new ArrayList<Agent>();
|
private final List<Agent> _agents = new ArrayList<Agent>();
|
||||||
private String hostToConnect;
|
|
||||||
private String connectedHost;
|
private String connectedHost;
|
||||||
private Long preferredHostCheckInterval;
|
private Long preferredHostCheckInterval;
|
||||||
private boolean connectionTransfer = false;
|
private boolean connectionTransfer = false;
|
||||||
|
|
@ -121,7 +121,7 @@ public class AgentShell implements IAgentShell, Daemon {
|
||||||
if (_hostCounter >= hosts.length) {
|
if (_hostCounter >= hosts.length) {
|
||||||
_hostCounter = 0;
|
_hostCounter = 0;
|
||||||
}
|
}
|
||||||
hostToConnect = hosts[_hostCounter % hosts.length];
|
String hostToConnect = hosts[_hostCounter % hosts.length];
|
||||||
_hostCounter++;
|
_hostCounter++;
|
||||||
return hostToConnect;
|
return hostToConnect;
|
||||||
}
|
}
|
||||||
|
|
@ -143,11 +143,10 @@ public class AgentShell implements IAgentShell, Daemon {
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void updateConnectedHost() {
|
public void updateConnectedHost(String connectedHost) {
|
||||||
connectedHost = hostToConnect;
|
this.connectedHost = connectedHost;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void resetHostCounter() {
|
public void resetHostCounter() {
|
||||||
_hostCounter = 0;
|
_hostCounter = 0;
|
||||||
|
|
@ -166,6 +165,16 @@ public class AgentShell implements IAgentShell, Daemon {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void setAvoidHosts(List<String> avoidHosts) {
|
||||||
|
_avoidHosts = avoidHosts;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<String> getAvoidHosts() {
|
||||||
|
return _avoidHosts;
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public String getPrivateIp() {
|
public String getPrivateIp() {
|
||||||
return _privateIp;
|
return _privateIp;
|
||||||
|
|
|
||||||
|
|
@ -16,6 +16,7 @@
|
||||||
// under the License.
|
// under the License.
|
||||||
package com.cloud.agent;
|
package com.cloud.agent;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.Properties;
|
import java.util.Properties;
|
||||||
|
|
||||||
|
|
@ -63,9 +64,13 @@ public interface IAgentShell {
|
||||||
|
|
||||||
String[] getHosts();
|
String[] getHosts();
|
||||||
|
|
||||||
|
void setAvoidHosts(List<String> hosts);
|
||||||
|
|
||||||
|
List<String> getAvoidHosts();
|
||||||
|
|
||||||
long getLbCheckerInterval(Long receivedLbInterval);
|
long getLbCheckerInterval(Long receivedLbInterval);
|
||||||
|
|
||||||
void updateConnectedHost();
|
void updateConnectedHost(String connectedHost);
|
||||||
|
|
||||||
String getConnectedHost();
|
String getConnectedHost();
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -816,7 +816,7 @@ public class AgentProperties{
|
||||||
* Data type: Integer.<br>
|
* Data type: Integer.<br>
|
||||||
* Default value: <code>null</code>
|
* Default value: <code>null</code>
|
||||||
*/
|
*/
|
||||||
public static final Property<Integer> SSL_HANDSHAKE_TIMEOUT = new Property<>("ssl.handshake.timeout", null, Integer.class);
|
public static final Property<Integer> SSL_HANDSHAKE_TIMEOUT = new Property<>("ssl.handshake.timeout", 30, Integer.class);
|
||||||
|
|
||||||
public static class Property <T>{
|
public static class Property <T>{
|
||||||
private String name;
|
private String name;
|
||||||
|
|
|
||||||
|
|
@ -358,7 +358,7 @@ public class AgentShellTest {
|
||||||
AgentShell shell = new AgentShell();
|
AgentShell shell = new AgentShell();
|
||||||
shell.setHosts("test");
|
shell.setHosts("test");
|
||||||
shell.getNextHost();
|
shell.getNextHost();
|
||||||
shell.updateConnectedHost();
|
shell.updateConnectedHost("test");
|
||||||
|
|
||||||
Assert.assertEquals(expected, shell.getConnectedHost());
|
Assert.assertEquals(expected, shell.getConnectedHost());
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -739,6 +739,13 @@ public class EventTypes {
|
||||||
//Purge resources
|
//Purge resources
|
||||||
public static final String EVENT_PURGE_EXPUNGED_RESOURCES = "PURGE.EXPUNGED.RESOURCES";
|
public static final String EVENT_PURGE_EXPUNGED_RESOURCES = "PURGE.EXPUNGED.RESOURCES";
|
||||||
|
|
||||||
|
// Management Server
|
||||||
|
public static final String EVENT_MS_MAINTENANCE_PREPARE = "MS.MAINTENANCE.PREPARE";
|
||||||
|
public static final String EVENT_MS_MAINTENANCE_CANCEL = "MS.MAINTENANCE.CANCEL";
|
||||||
|
public static final String EVENT_MS_SHUTDOWN_PREPARE = "MS.SHUTDOWN.PREPARE";
|
||||||
|
public static final String EVENT_MS_SHUTDOWN_CANCEL = "MS.SHUTDOWN.CANCEL";
|
||||||
|
public static final String EVENT_MS_SHUTDOWN = "MS.SHUTDOWN";
|
||||||
|
|
||||||
// OBJECT STORE
|
// OBJECT STORE
|
||||||
public static final String EVENT_OBJECT_STORE_CREATE = "OBJECT.STORE.CREATE";
|
public static final String EVENT_OBJECT_STORE_CREATE = "OBJECT.STORE.CREATE";
|
||||||
public static final String EVENT_OBJECT_STORE_DELETE = "OBJECT.STORE.DELETE";
|
public static final String EVENT_OBJECT_STORE_DELETE = "OBJECT.STORE.DELETE";
|
||||||
|
|
@ -1233,6 +1240,12 @@ public class EventTypes {
|
||||||
entityEventDetails.put(EVENT_UPDATE_IMAGE_STORE_ACCESS_STATE, ImageStore.class);
|
entityEventDetails.put(EVENT_UPDATE_IMAGE_STORE_ACCESS_STATE, ImageStore.class);
|
||||||
entityEventDetails.put(EVENT_LIVE_PATCH_SYSTEMVM, "SystemVMs");
|
entityEventDetails.put(EVENT_LIVE_PATCH_SYSTEMVM, "SystemVMs");
|
||||||
|
|
||||||
|
entityEventDetails.put(EVENT_MS_MAINTENANCE_PREPARE, "ManagementServer");
|
||||||
|
entityEventDetails.put(EVENT_MS_MAINTENANCE_CANCEL, "ManagementServer");
|
||||||
|
entityEventDetails.put(EVENT_MS_SHUTDOWN_PREPARE, "ManagementServer");
|
||||||
|
entityEventDetails.put(EVENT_MS_SHUTDOWN_CANCEL, "ManagementServer");
|
||||||
|
entityEventDetails.put(EVENT_MS_SHUTDOWN, "ManagementServer");
|
||||||
|
|
||||||
//Object Store
|
//Object Store
|
||||||
entityEventDetails.put(EVENT_OBJECT_STORE_CREATE, ObjectStore.class);
|
entityEventDetails.put(EVENT_OBJECT_STORE_CREATE, ObjectStore.class);
|
||||||
entityEventDetails.put(EVENT_OBJECT_STORE_UPDATE, ObjectStore.class);
|
entityEventDetails.put(EVENT_OBJECT_STORE_UPDATE, ObjectStore.class);
|
||||||
|
|
|
||||||
|
|
@ -1150,6 +1150,7 @@ public class ApiConstants {
|
||||||
public static final String PENDING_JOBS_COUNT = "pendingjobscount";
|
public static final String PENDING_JOBS_COUNT = "pendingjobscount";
|
||||||
public static final String AGENTS_COUNT = "agentscount";
|
public static final String AGENTS_COUNT = "agentscount";
|
||||||
public static final String AGENTS = "agents";
|
public static final String AGENTS = "agents";
|
||||||
|
public static final String LAST_AGENTS = "lastagents";
|
||||||
|
|
||||||
public static final String PUBLIC_MTU = "publicmtu";
|
public static final String PUBLIC_MTU = "publicmtu";
|
||||||
public static final String PRIVATE_MTU = "privatemtu";
|
public static final String PRIVATE_MTU = "privatemtu";
|
||||||
|
|
|
||||||
|
|
@ -30,6 +30,7 @@ public enum ApiErrorCode {
|
||||||
UNSUPPORTED_ACTION_ERROR(432),
|
UNSUPPORTED_ACTION_ERROR(432),
|
||||||
API_LIMIT_EXCEED(429),
|
API_LIMIT_EXCEED(429),
|
||||||
|
|
||||||
|
SERVICE_UNAVAILABLE(503),
|
||||||
INTERNAL_ERROR(530),
|
INTERNAL_ERROR(530),
|
||||||
ACCOUNT_ERROR(531),
|
ACCOUNT_ERROR(531),
|
||||||
ACCOUNT_RESOURCE_LIMIT_ERROR(532),
|
ACCOUNT_RESOURCE_LIMIT_ERROR(532),
|
||||||
|
|
|
||||||
|
|
@ -82,6 +82,14 @@ public class ManagementServerResponse extends BaseResponse {
|
||||||
@Param(description = "the Management Server Peers")
|
@Param(description = "the Management Server Peers")
|
||||||
private List<PeerManagementServerNodeResponse> peers;
|
private List<PeerManagementServerNodeResponse> peers;
|
||||||
|
|
||||||
|
@SerializedName(ApiConstants.LAST_AGENTS)
|
||||||
|
@Param(description = "the last agents this Management Server is responsible for, before shutdown or preparing for maintenance", since = "4.21.0.0")
|
||||||
|
private List<String> lastAgents;
|
||||||
|
|
||||||
|
@SerializedName(ApiConstants.AGENTS)
|
||||||
|
@Param(description = "the agents this Management Server is responsible for", since = "4.21.0.0")
|
||||||
|
private List<String> agents;
|
||||||
|
|
||||||
@SerializedName(ApiConstants.AGENTS_COUNT)
|
@SerializedName(ApiConstants.AGENTS_COUNT)
|
||||||
@Param(description = "the number of host agents this Management Server is responsible for", since = "4.21.0.0")
|
@Param(description = "the number of host agents this Management Server is responsible for", since = "4.21.0.0")
|
||||||
private Long agentsCount;
|
private Long agentsCount;
|
||||||
|
|
@ -134,6 +142,14 @@ public class ManagementServerResponse extends BaseResponse {
|
||||||
return ipAddress;
|
return ipAddress;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public List<String> getLastAgents() {
|
||||||
|
return lastAgents;
|
||||||
|
}
|
||||||
|
|
||||||
|
public List<String> getAgents() {
|
||||||
|
return agents;
|
||||||
|
}
|
||||||
|
|
||||||
public Long getAgentsCount() {
|
public Long getAgentsCount() {
|
||||||
return this.agentsCount;
|
return this.agentsCount;
|
||||||
}
|
}
|
||||||
|
|
@ -190,6 +206,14 @@ public class ManagementServerResponse extends BaseResponse {
|
||||||
this.ipAddress = ipAddress;
|
this.ipAddress = ipAddress;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void setLastAgents(List<String> lastAgents) {
|
||||||
|
this.lastAgents = lastAgents;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setAgents(List<String> agents) {
|
||||||
|
this.agents = agents;
|
||||||
|
}
|
||||||
|
|
||||||
public void setAgentsCount(Long agentsCount) {
|
public void setAgentsCount(Long agentsCount) {
|
||||||
this.agentsCount = agentsCount;
|
this.agentsCount = agentsCount;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -19,18 +19,22 @@
|
||||||
|
|
||||||
package com.cloud.agent.api;
|
package com.cloud.agent.api;
|
||||||
|
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
public class PingAnswer extends Answer {
|
public class PingAnswer extends Answer {
|
||||||
private PingCommand _command = null;
|
private PingCommand _command = null;
|
||||||
|
|
||||||
private boolean sendStartup = false;
|
private boolean sendStartup = false;
|
||||||
|
private List<String> avoidMsList;
|
||||||
|
|
||||||
protected PingAnswer() {
|
protected PingAnswer() {
|
||||||
}
|
}
|
||||||
|
|
||||||
public PingAnswer(PingCommand cmd, boolean sendStartup) {
|
public PingAnswer(PingCommand cmd, List<String> avoidMsList, boolean sendStartup) {
|
||||||
super(cmd);
|
super(cmd);
|
||||||
_command = cmd;
|
_command = cmd;
|
||||||
this.sendStartup = sendStartup;
|
this.sendStartup = sendStartup;
|
||||||
|
this.avoidMsList = avoidMsList;
|
||||||
}
|
}
|
||||||
|
|
||||||
public PingCommand getCommand() {
|
public PingCommand getCommand() {
|
||||||
|
|
@ -44,4 +48,8 @@ public class PingAnswer extends Answer {
|
||||||
public void setSendStartup(boolean sendStartup) {
|
public void setSendStartup(boolean sendStartup) {
|
||||||
this.sendStartup = sendStartup;
|
this.sendStartup = sendStartup;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public List<String> getAvoidMsList() {
|
||||||
|
return avoidMsList;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -35,6 +35,7 @@ public class ReadyCommand extends Command {
|
||||||
private String hostUuid;
|
private String hostUuid;
|
||||||
private String hostName;
|
private String hostName;
|
||||||
private List<String> msHostList;
|
private List<String> msHostList;
|
||||||
|
private List<String> avoidMsHostList;
|
||||||
private String lbAlgorithm;
|
private String lbAlgorithm;
|
||||||
private Long lbCheckInterval;
|
private Long lbCheckInterval;
|
||||||
private Boolean enableHumanReadableSizes;
|
private Boolean enableHumanReadableSizes;
|
||||||
|
|
@ -90,6 +91,14 @@ public class ReadyCommand extends Command {
|
||||||
this.msHostList = msHostList;
|
this.msHostList = msHostList;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public List<String> getAvoidMsHostList() {
|
||||||
|
return avoidMsHostList;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void setAvoidMsHostList(List<String> msHostList) {
|
||||||
|
this.avoidMsHostList = avoidMsHostList;
|
||||||
|
}
|
||||||
|
|
||||||
public String getLbAlgorithm() {
|
public String getLbAlgorithm() {
|
||||||
return lbAlgorithm;
|
return lbAlgorithm;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -26,12 +26,14 @@ import com.cloud.agent.api.Command;
|
||||||
public class SetupMSListCommand extends Command {
|
public class SetupMSListCommand extends Command {
|
||||||
|
|
||||||
private List<String> msList;
|
private List<String> msList;
|
||||||
|
private List<String> avoidMsList;
|
||||||
private String lbAlgorithm;
|
private String lbAlgorithm;
|
||||||
private Long lbCheckInterval;
|
private Long lbCheckInterval;
|
||||||
|
|
||||||
public SetupMSListCommand(final List<String> msList, final String lbAlgorithm, final Long lbCheckInterval) {
|
public SetupMSListCommand(final List<String> msList, final List<String> avoidMsList, final String lbAlgorithm, final Long lbCheckInterval) {
|
||||||
super();
|
super();
|
||||||
this.msList = msList;
|
this.msList = msList;
|
||||||
|
this.avoidMsList = avoidMsList;
|
||||||
this.lbAlgorithm = lbAlgorithm;
|
this.lbAlgorithm = lbAlgorithm;
|
||||||
this.lbCheckInterval = lbCheckInterval;
|
this.lbCheckInterval = lbCheckInterval;
|
||||||
}
|
}
|
||||||
|
|
@ -40,6 +42,10 @@ public class SetupMSListCommand extends Command {
|
||||||
return msList;
|
return msList;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public List<String> getAvoidMsList() {
|
||||||
|
return avoidMsList;
|
||||||
|
}
|
||||||
|
|
||||||
public String getLbAlgorithm() {
|
public String getLbAlgorithm() {
|
||||||
return lbAlgorithm;
|
return lbAlgorithm;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -16,7 +16,6 @@
|
||||||
// under the License.
|
// under the License.
|
||||||
package com.cloud.agent;
|
package com.cloud.agent;
|
||||||
|
|
||||||
import java.util.List;
|
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
|
|
||||||
import org.apache.cloudstack.framework.config.ConfigKey;
|
import org.apache.cloudstack.framework.config.ConfigKey;
|
||||||
|
|
@ -173,8 +172,4 @@ public interface AgentManager {
|
||||||
void propagateChangeToAgents(Map<String, String> params);
|
void propagateChangeToAgents(Map<String, String> params);
|
||||||
|
|
||||||
boolean transferDirectAgentsFromMS(String fromMsUuid, long fromMsId, long timeoutDurationInMs);
|
boolean transferDirectAgentsFromMS(String fromMsUuid, long fromMsId, long timeoutDurationInMs);
|
||||||
|
|
||||||
List<String> getLastAgents();
|
|
||||||
|
|
||||||
void setLastAgents(List<String> lastAgents);
|
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -214,13 +214,13 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
|
||||||
|
|
||||||
protected final ConfigKey<Integer> Workers = new ConfigKey<>("Advanced", Integer.class, "workers", "5",
|
protected final ConfigKey<Integer> Workers = new ConfigKey<>("Advanced", Integer.class, "workers", "5",
|
||||||
"Number of worker threads handling remote agent connections.", false);
|
"Number of worker threads handling remote agent connections.", false);
|
||||||
protected final ConfigKey<Integer> Port = new ConfigKey<>("Advanced", Integer.class, "port", "8250", "Port to listen on for remote agent connections.", false);
|
protected final ConfigKey<Integer> Port = new ConfigKey<>("Advanced", Integer.class, "port", "8250", "Port to listen on for remote (indirect) agent connections.", false);
|
||||||
protected final ConfigKey<Integer> RemoteAgentSslHandshakeTimeout = new ConfigKey<>("Advanced",
|
protected final ConfigKey<Integer> RemoteAgentSslHandshakeTimeout = new ConfigKey<>("Advanced",
|
||||||
Integer.class, "agent.ssl.handshake.timeout", "30",
|
Integer.class, "agent.ssl.handshake.timeout", "30",
|
||||||
"Seconds after which SSL handshake times out during remote agent connections.", false);
|
"Seconds after which SSL handshake times out during remote (indirect) agent connections.", false);
|
||||||
protected final ConfigKey<Integer> RemoteAgentMaxConcurrentNewConnections = new ConfigKey<>("Advanced",
|
protected final ConfigKey<Integer> RemoteAgentMaxConcurrentNewConnections = new ConfigKey<>("Advanced",
|
||||||
Integer.class, "agent.max.concurrent.new.connections", "0",
|
Integer.class, "agent.max.concurrent.new.connections", "0",
|
||||||
"Number of maximum concurrent new connections server allows for remote agents. " +
|
"Number of maximum concurrent new connections server allows for remote (indirect) agents. " +
|
||||||
"If set to zero (default value) then no limit will be enforced on concurrent new connections",
|
"If set to zero (default value) then no limit will be enforced on concurrent new connections",
|
||||||
false);
|
false);
|
||||||
protected final ConfigKey<Integer> AlertWait = new ConfigKey<>("Advanced", Integer.class, "alert.wait", "1800",
|
protected final ConfigKey<Integer> AlertWait = new ConfigKey<>("Advanced", Integer.class, "alert.wait", "1800",
|
||||||
|
|
@ -255,9 +255,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
|
||||||
|
|
||||||
_executor = new ThreadPoolExecutor(agentTaskThreads, agentTaskThreads, 60L, TimeUnit.SECONDS, new LinkedBlockingQueue<>(), new NamedThreadFactory("AgentTaskPool"));
|
_executor = new ThreadPoolExecutor(agentTaskThreads, agentTaskThreads, 60L, TimeUnit.SECONDS, new LinkedBlockingQueue<>(), new NamedThreadFactory("AgentTaskPool"));
|
||||||
|
|
||||||
_connectExecutor = new ThreadPoolExecutor(100, 500, 60L, TimeUnit.SECONDS, new LinkedBlockingQueue<>(), new NamedThreadFactory("AgentConnectTaskPool"));
|
initConnectExecutor();
|
||||||
// allow core threads to time out even when there are no items in the queue
|
|
||||||
_connectExecutor.allowCoreThreadTimeOut(true);
|
|
||||||
|
|
||||||
maxConcurrentNewAgentConnections = RemoteAgentMaxConcurrentNewConnections.value();
|
maxConcurrentNewAgentConnections = RemoteAgentMaxConcurrentNewConnections.value();
|
||||||
|
|
||||||
|
|
@ -273,10 +271,6 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
|
||||||
logger.debug("Created DirectAgentAttache pool with size: {}.", directAgentPoolSize);
|
logger.debug("Created DirectAgentAttache pool with size: {}.", directAgentPoolSize);
|
||||||
_directAgentThreadCap = Math.round(directAgentPoolSize * DirectAgentThreadCap.value()) + 1; // add 1 to always make the value > 0
|
_directAgentThreadCap = Math.round(directAgentPoolSize * DirectAgentThreadCap.value()) + 1; // add 1 to always make the value > 0
|
||||||
|
|
||||||
_monitorExecutor = new ScheduledThreadPoolExecutor(1, new NamedThreadFactory("AgentMonitor"));
|
|
||||||
|
|
||||||
newAgentConnectionsMonitor = Executors.newScheduledThreadPool(1, new NamedThreadFactory("NewAgentConnectionsMonitor"));
|
|
||||||
|
|
||||||
initializeCommandTimeouts();
|
initializeCommandTimeouts();
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
|
|
@ -351,10 +345,27 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
|
||||||
_hostMonitors.remove(id);
|
_hostMonitors.remove(id);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onManagementServerPreparingForMaintenance() {
|
||||||
|
logger.debug("Management server preparing for maintenance");
|
||||||
|
if (_connection != null) {
|
||||||
|
_connection.block();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onManagementServerCancelPreparingForMaintenance() {
|
||||||
|
logger.debug("Management server cancel preparing for maintenance");
|
||||||
|
if (_connection != null) {
|
||||||
|
_connection.unblock();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void onManagementServerMaintenance() {
|
public void onManagementServerMaintenance() {
|
||||||
logger.debug("Management server maintenance enabled");
|
logger.debug("Management server maintenance enabled");
|
||||||
_monitorExecutor.shutdownNow();
|
_monitorExecutor.shutdownNow();
|
||||||
|
newAgentConnectionsMonitor.shutdownNow();
|
||||||
if (_connection != null) {
|
if (_connection != null) {
|
||||||
_connection.stop();
|
_connection.stop();
|
||||||
|
|
||||||
|
|
@ -371,10 +382,8 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
|
||||||
public void onManagementServerCancelMaintenance() {
|
public void onManagementServerCancelMaintenance() {
|
||||||
logger.debug("Management server maintenance disabled");
|
logger.debug("Management server maintenance disabled");
|
||||||
if (_connectExecutor.isShutdown()) {
|
if (_connectExecutor.isShutdown()) {
|
||||||
_connectExecutor = new ThreadPoolExecutor(100, 500, 60L, TimeUnit.SECONDS, new LinkedBlockingQueue<>(), new NamedThreadFactory("AgentConnectTaskPool"));
|
initConnectExecutor();
|
||||||
_connectExecutor.allowCoreThreadTimeOut(true);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
startDirectlyConnectedHosts(true);
|
startDirectlyConnectedHosts(true);
|
||||||
if (_connection != null) {
|
if (_connection != null) {
|
||||||
try {
|
try {
|
||||||
|
|
@ -385,9 +394,28 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
|
||||||
}
|
}
|
||||||
|
|
||||||
if (_monitorExecutor.isShutdown()) {
|
if (_monitorExecutor.isShutdown()) {
|
||||||
_monitorExecutor = new ScheduledThreadPoolExecutor(1, new NamedThreadFactory("AgentMonitor"));
|
initAndScheduleMonitorExecutor();
|
||||||
_monitorExecutor.scheduleWithFixedDelay(new MonitorTask(), mgmtServiceConf.getPingInterval(), mgmtServiceConf.getPingInterval(), TimeUnit.SECONDS);
|
|
||||||
}
|
}
|
||||||
|
if (newAgentConnectionsMonitor.isShutdown()) {
|
||||||
|
initAndScheduleAgentConnectionsMonitor();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private void initConnectExecutor() {
|
||||||
|
_connectExecutor = new ThreadPoolExecutor(100, 500, 60L, TimeUnit.SECONDS, new LinkedBlockingQueue<>(), new NamedThreadFactory("AgentConnectTaskPool"));
|
||||||
|
// allow core threads to time out even when there are no items in the queue
|
||||||
|
_connectExecutor.allowCoreThreadTimeOut(true);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void initAndScheduleMonitorExecutor() {
|
||||||
|
_monitorExecutor = new ScheduledThreadPoolExecutor(1, new NamedThreadFactory("AgentMonitor"));
|
||||||
|
_monitorExecutor.scheduleWithFixedDelay(new MonitorTask(), mgmtServiceConf.getPingInterval(), mgmtServiceConf.getPingInterval(), TimeUnit.SECONDS);
|
||||||
|
}
|
||||||
|
|
||||||
|
private void initAndScheduleAgentConnectionsMonitor() {
|
||||||
|
final int cleanupTimeInSecs = Wait.value();
|
||||||
|
newAgentConnectionsMonitor = Executors.newScheduledThreadPool(1, new NamedThreadFactory("NewAgentConnectionsMonitor"));
|
||||||
|
newAgentConnectionsMonitor.scheduleAtFixedRate(new AgentNewConnectionsMonitorTask(), cleanupTimeInSecs, cleanupTimeInSecs, TimeUnit.SECONDS);
|
||||||
}
|
}
|
||||||
|
|
||||||
private AgentControlAnswer handleControlCommand(final AgentAttache attache, final AgentControlCommand cmd) {
|
private AgentControlAnswer handleControlCommand(final AgentAttache attache, final AgentControlCommand cmd) {
|
||||||
|
|
@ -426,16 +454,6 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
|
||||||
return attache;
|
return attache;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
|
||||||
public List<String> getLastAgents() {
|
|
||||||
return lastAgents;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
|
||||||
public void setLastAgents(List<String> lastAgents) {
|
|
||||||
this.lastAgents = lastAgents;
|
|
||||||
}
|
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public Answer sendTo(final Long dcId, final HypervisorType type, final Command cmd) {
|
public Answer sendTo(final Long dcId, final HypervisorType type, final Command cmd) {
|
||||||
final List<ClusterVO> clusters = _clusterDao.listByDcHyType(dcId, type.toString());
|
final List<ClusterVO> clusters = _clusterDao.listByDcHyType(dcId, type.toString());
|
||||||
|
|
@ -779,6 +797,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
|
||||||
ManagementServerHostVO msHost = _mshostDao.findByMsid(_nodeId);
|
ManagementServerHostVO msHost = _mshostDao.findByMsid(_nodeId);
|
||||||
if (msHost != null && (ManagementServerHost.State.Maintenance.equals(msHost.getState()) || ManagementServerHost.State.PreparingForMaintenance.equals(msHost.getState()))) {
|
if (msHost != null && (ManagementServerHost.State.Maintenance.equals(msHost.getState()) || ManagementServerHost.State.PreparingForMaintenance.equals(msHost.getState()))) {
|
||||||
_monitorExecutor.shutdownNow();
|
_monitorExecutor.shutdownNow();
|
||||||
|
newAgentConnectionsMonitor.shutdownNow();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -792,12 +811,8 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
_monitorExecutor.scheduleWithFixedDelay(new MonitorTask(), mgmtServiceConf.getPingInterval(), mgmtServiceConf.getPingInterval(), TimeUnit.SECONDS);
|
initAndScheduleMonitorExecutor();
|
||||||
|
initAndScheduleAgentConnectionsMonitor();
|
||||||
final int cleanupTime = Wait.value();
|
|
||||||
newAgentConnectionsMonitor.scheduleAtFixedRate(new AgentNewConnectionsMonitorTask(), cleanupTime,
|
|
||||||
cleanupTime, TimeUnit.MINUTES);
|
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1304,6 +1319,8 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
|
||||||
if (!indirectAgentLB.compareManagementServerList(host.getId(), host.getDataCenterId(), agentMSHostList, lbAlgorithm)) {
|
if (!indirectAgentLB.compareManagementServerList(host.getId(), host.getDataCenterId(), agentMSHostList, lbAlgorithm)) {
|
||||||
final List<String> newMSList = indirectAgentLB.getManagementServerList(host.getId(), host.getDataCenterId(), null);
|
final List<String> newMSList = indirectAgentLB.getManagementServerList(host.getId(), host.getDataCenterId(), null);
|
||||||
ready.setMsHostList(newMSList);
|
ready.setMsHostList(newMSList);
|
||||||
|
final List<String> avoidMsList = _mshostDao.listNonUpStateMsIPs();
|
||||||
|
ready.setAvoidMsHostList(avoidMsList);
|
||||||
ready.setLbAlgorithm(indirectAgentLB.getLBAlgorithmName());
|
ready.setLbAlgorithm(indirectAgentLB.getLBAlgorithmName());
|
||||||
ready.setLbCheckInterval(indirectAgentLB.getLBPreferredHostCheckInterval(host.getClusterId()));
|
ready.setLbCheckInterval(indirectAgentLB.getLBPreferredHostCheckInterval(host.getClusterId()));
|
||||||
logger.debug("Agent's management server host list is not up to date, sending list update: {}", newMSList);
|
logger.debug("Agent's management server host list is not up to date, sending list update: {}", newMSList);
|
||||||
|
|
@ -1608,7 +1625,8 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
|
||||||
if (host!= null && host.getStatus() != Status.Up && gatewayAccessible) {
|
if (host!= null && host.getStatus() != Status.Up && gatewayAccessible) {
|
||||||
requestStartupCommand = true;
|
requestStartupCommand = true;
|
||||||
}
|
}
|
||||||
answer = new PingAnswer((PingCommand)cmd, requestStartupCommand);
|
final List<String> avoidMsList = _mshostDao.listNonUpStateMsIPs();
|
||||||
|
answer = new PingAnswer((PingCommand)cmd, avoidMsList, requestStartupCommand);
|
||||||
} else if (cmd instanceof ReadyAnswer) {
|
} else if (cmd instanceof ReadyAnswer) {
|
||||||
final HostVO host = _hostDao.findById(attache.getId());
|
final HostVO host = _hostDao.findById(attache.getId());
|
||||||
if (host == null) {
|
if (host == null) {
|
||||||
|
|
@ -1929,25 +1947,19 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
|
||||||
logger.trace("Agent New Connections Monitor is started.");
|
logger.trace("Agent New Connections Monitor is started.");
|
||||||
final int cleanupTime = Wait.value();
|
final int cleanupTime = Wait.value();
|
||||||
Set<Map.Entry<String, Long>> entrySet = newAgentConnections.entrySet();
|
Set<Map.Entry<String, Long>> entrySet = newAgentConnections.entrySet();
|
||||||
long cutOff = System.currentTimeMillis() - (cleanupTime * 60 * 1000L);
|
long cutOff = System.currentTimeMillis() - (cleanupTime * 1000L);
|
||||||
if (logger.isDebugEnabled()) {
|
List<String> expiredConnections = newAgentConnections.entrySet()
|
||||||
List<String> expiredConnections = newAgentConnections.entrySet()
|
.stream()
|
||||||
.stream()
|
.filter(e -> e.getValue() <= cutOff)
|
||||||
.filter(e -> e.getValue() <= cutOff)
|
.map(Map.Entry::getKey)
|
||||||
.map(Map.Entry::getKey)
|
.collect(Collectors.toList());
|
||||||
.collect(Collectors.toList());
|
logger.debug("Currently {} active new connections, of which {} have expired - {}",
|
||||||
logger.debug("Currently {} active new connections, of which {} have expired - {}",
|
entrySet.size(),
|
||||||
entrySet.size(),
|
expiredConnections.size(),
|
||||||
expiredConnections.size(),
|
StringUtils.join(expiredConnections));
|
||||||
StringUtils.join(expiredConnections));
|
for (String connection : expiredConnections) {
|
||||||
}
|
logger.trace("Cleaning up new agent connection for {}", connection);
|
||||||
for (Map.Entry<String, Long> entry : entrySet) {
|
newAgentConnections.remove(connection);
|
||||||
if (entry.getValue() <= cutOff) {
|
|
||||||
if (logger.isTraceEnabled()) {
|
|
||||||
logger.trace("Cleaning up new agent connection for {}", entry.getKey());
|
|
||||||
}
|
|
||||||
newAgentConnections.remove(entry.getKey());
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -151,11 +151,11 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
||||||
super();
|
super();
|
||||||
}
|
}
|
||||||
|
|
||||||
protected final ConfigKey<Boolean> EnableLB = new ConfigKey<>(Boolean.class, "agent.lb.enabled", "Advanced", "false", "Enable agent load balancing between management server nodes", true);
|
protected final ConfigKey<Boolean> EnableLB = new ConfigKey<>(Boolean.class, "agent.lb.enabled", "Advanced", "false", "Enable direct agents load balancing between management server nodes", true);
|
||||||
protected final ConfigKey<Double> ConnectedAgentThreshold = new ConfigKey<>(Double.class, "agent.load.threshold", "Advanced", "0.7",
|
protected final ConfigKey<Double> ConnectedAgentThreshold = new ConfigKey<>(Double.class, "agent.load.threshold", "Advanced", "0.7",
|
||||||
"What percentage of the agents can be held by one management server before load balancing happens", true, EnableLB.key());
|
"What percentage of the direct agents can be held by one management server before load balancing happens", true, EnableLB.key());
|
||||||
protected final ConfigKey<Integer> LoadSize = new ConfigKey<>(Integer.class, "direct.agent.load.size", "Advanced", "16", "How many agents to connect to in each round", true);
|
protected final ConfigKey<Integer> LoadSize = new ConfigKey<>(Integer.class, "direct.agent.load.size", "Advanced", "16", "How many direct agents to connect to in each round", true);
|
||||||
protected final ConfigKey<Integer> ScanInterval = new ConfigKey<>(Integer.class, "direct.agent.scan.interval", "Advanced", "90", "Interval between scans to load agents", false,
|
protected final ConfigKey<Integer> ScanInterval = new ConfigKey<>(Integer.class, "direct.agent.scan.interval", "Advanced", "90", "Interval between scans to load direct agents", false,
|
||||||
ConfigKey.Scope.Global, 1000);
|
ConfigKey.Scope.Global, 1000);
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
@ -1395,7 +1395,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
long transferStartTime = System.currentTimeMillis();
|
long transferStartTimeInMs = System.currentTimeMillis();
|
||||||
if (CollectionUtils.isEmpty(getDirectAgentHosts(fromMsId))) {
|
if (CollectionUtils.isEmpty(getDirectAgentHosts(fromMsId))) {
|
||||||
logger.info("No direct agent hosts available on management server node {} (id: {}), to transfer", fromMsId, fromMsUuid);
|
logger.info("No direct agent hosts available on management server node {} (id: {}), to transfer", fromMsId, fromMsUuid);
|
||||||
return true;
|
return true;
|
||||||
|
|
@ -1417,7 +1417,7 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
||||||
}
|
}
|
||||||
logger.debug("Transferring {} direct agents from management server node {} (id: {}) of zone {}", directAgentHostsInDc.size(), fromMsId, fromMsUuid, dc);
|
logger.debug("Transferring {} direct agents from management server node {} (id: {}) of zone {}", directAgentHostsInDc.size(), fromMsId, fromMsUuid, dc);
|
||||||
for (HostVO host : directAgentHostsInDc) {
|
for (HostVO host : directAgentHostsInDc) {
|
||||||
long transferElapsedTimeInMs = System.currentTimeMillis() - transferStartTime;
|
long transferElapsedTimeInMs = System.currentTimeMillis() - transferStartTimeInMs;
|
||||||
if (transferElapsedTimeInMs >= timeoutDurationInMs) {
|
if (transferElapsedTimeInMs >= timeoutDurationInMs) {
|
||||||
logger.debug("Stop transferring remaining direct agents from management server node {} (id: {}), timed out", fromMsId, fromMsUuid);
|
logger.debug("Stop transferring remaining direct agents from management server node {} (id: {}), timed out", fromMsId, fromMsUuid);
|
||||||
return false;
|
return false;
|
||||||
|
|
@ -1486,6 +1486,18 @@ public class ClusteredAgentManagerImpl extends AgentManagerImpl implements Clust
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onManagementServerPreparingForMaintenance() {
|
||||||
|
logger.debug("Management server preparing for maintenance");
|
||||||
|
super.onManagementServerPreparingForMaintenance();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onManagementServerCancelPreparingForMaintenance() {
|
||||||
|
logger.debug("Management server cancel preparing for maintenance");
|
||||||
|
super.onManagementServerPreparingForMaintenance();
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void onManagementServerMaintenance() {
|
public void onManagementServerMaintenance() {
|
||||||
logger.debug("Management server maintenance enabled");
|
logger.debug("Management server maintenance enabled");
|
||||||
|
|
|
||||||
|
|
@ -21,7 +21,7 @@ import org.apache.cloudstack.framework.config.Configurable;
|
||||||
|
|
||||||
public interface ManagementServiceConfiguration extends Configurable {
|
public interface ManagementServiceConfiguration extends Configurable {
|
||||||
ConfigKey<Integer> PingInterval = new ConfigKey<Integer>("Advanced", Integer.class, "ping.interval", "60",
|
ConfigKey<Integer> PingInterval = new ConfigKey<Integer>("Advanced", Integer.class, "ping.interval", "60",
|
||||||
"Interval to send application level pings to make sure the connection is still working", false);
|
"Interval in seconds to send application level pings to make sure the connection is still working", false);
|
||||||
ConfigKey<Float> PingTimeout = new ConfigKey<Float>("Advanced", Float.class, "ping.timeout", "2.5",
|
ConfigKey<Float> PingTimeout = new ConfigKey<Float>("Advanced", Float.class, "ping.timeout", "2.5",
|
||||||
"Multiplier to ping.interval before announcing an agent has timed out", true);
|
"Multiplier to ping.interval before announcing an agent has timed out", true);
|
||||||
public int getPingInterval();
|
public int getPingInterval();
|
||||||
|
|
|
||||||
|
|
@ -183,6 +183,13 @@ public interface HostDao extends GenericDao<HostVO, Long>, StateDao<Status, Stat
|
||||||
*/
|
*/
|
||||||
List<String> listByMs(long msId);
|
List<String> listByMs(long msId);
|
||||||
|
|
||||||
|
/**
|
||||||
|
* Retrieves the last host ids/agents this {@see ManagementServer} has responsibility over.
|
||||||
|
* @param msId the id of the {@see ManagementServer}
|
||||||
|
* @return the last host ids/agents this {@see ManagementServer} has responsibility over
|
||||||
|
*/
|
||||||
|
List<String> listByLastMs(long msId);
|
||||||
|
|
||||||
/**
|
/**
|
||||||
* Retrieves the hypervisor versions of the hosts in the datacenter which are in Up state in ascending order
|
* Retrieves the hypervisor versions of the hosts in the datacenter which are in Up state in ascending order
|
||||||
* @param datacenterId data center id
|
* @param datacenterId data center id
|
||||||
|
|
@ -200,7 +207,7 @@ public interface HostDao extends GenericDao<HostVO, Long>, StateDao<Status, Stat
|
||||||
boolean isHostUp(long hostId);
|
boolean isHostUp(long hostId);
|
||||||
|
|
||||||
List<Long> findHostIdsByZoneClusterResourceStateTypeAndHypervisorType(final Long zoneId, final Long clusterId,
|
List<Long> findHostIdsByZoneClusterResourceStateTypeAndHypervisorType(final Long zoneId, final Long clusterId,
|
||||||
final List<ResourceState> resourceStates, final List<Type> types,
|
final Long msId, final List<ResourceState> resourceStates, final List<Type> types,
|
||||||
final List<Hypervisor.HypervisorType> hypervisorTypes);
|
final List<Hypervisor.HypervisorType> hypervisorTypes);
|
||||||
|
|
||||||
List<HypervisorType> listDistinctHypervisorTypes(final Long zoneId);
|
List<HypervisorType> listDistinctHypervisorTypes(final Long zoneId);
|
||||||
|
|
|
||||||
|
|
@ -129,6 +129,7 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
|
||||||
protected SearchBuilder<HostVO> ResponsibleMsSearch;
|
protected SearchBuilder<HostVO> ResponsibleMsSearch;
|
||||||
protected SearchBuilder<HostVO> ResponsibleMsDcSearch;
|
protected SearchBuilder<HostVO> ResponsibleMsDcSearch;
|
||||||
protected GenericSearchBuilder<HostVO, String> ResponsibleMsIdSearch;
|
protected GenericSearchBuilder<HostVO, String> ResponsibleMsIdSearch;
|
||||||
|
protected GenericSearchBuilder<HostVO, String> LastMsIdSearch;
|
||||||
protected SearchBuilder<HostVO> HostTypeClusterCountSearch;
|
protected SearchBuilder<HostVO> HostTypeClusterCountSearch;
|
||||||
protected SearchBuilder<HostVO> HostTypeZoneCountSearch;
|
protected SearchBuilder<HostVO> HostTypeZoneCountSearch;
|
||||||
protected SearchBuilder<HostVO> ClusterStatusSearch;
|
protected SearchBuilder<HostVO> ClusterStatusSearch;
|
||||||
|
|
@ -209,6 +210,11 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
|
||||||
ResponsibleMsIdSearch.and("managementServerId", ResponsibleMsIdSearch.entity().getManagementServerId(), SearchCriteria.Op.EQ);
|
ResponsibleMsIdSearch.and("managementServerId", ResponsibleMsIdSearch.entity().getManagementServerId(), SearchCriteria.Op.EQ);
|
||||||
ResponsibleMsIdSearch.done();
|
ResponsibleMsIdSearch.done();
|
||||||
|
|
||||||
|
LastMsIdSearch = createSearchBuilder(String.class);
|
||||||
|
LastMsIdSearch.selectFields(LastMsIdSearch.entity().getUuid());
|
||||||
|
LastMsIdSearch.and("lastManagementServerId", LastMsIdSearch.entity().getLastManagementServerId(), SearchCriteria.Op.EQ);
|
||||||
|
LastMsIdSearch.done();
|
||||||
|
|
||||||
HostTypeClusterCountSearch = createSearchBuilder();
|
HostTypeClusterCountSearch = createSearchBuilder();
|
||||||
HostTypeClusterCountSearch.and("cluster", HostTypeClusterCountSearch.entity().getClusterId(), SearchCriteria.Op.EQ);
|
HostTypeClusterCountSearch.and("cluster", HostTypeClusterCountSearch.entity().getClusterId(), SearchCriteria.Op.EQ);
|
||||||
HostTypeClusterCountSearch.and("type", HostTypeClusterCountSearch.entity().getType(), SearchCriteria.Op.EQ);
|
HostTypeClusterCountSearch.and("type", HostTypeClusterCountSearch.entity().getType(), SearchCriteria.Op.EQ);
|
||||||
|
|
@ -1569,6 +1575,13 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
|
||||||
return customSearch(sc, null);
|
return customSearch(sc, null);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public List<String> listByLastMs(long msId) {
|
||||||
|
SearchCriteria<String> sc = LastMsIdSearch.create();
|
||||||
|
sc.addAnd("lastManagementServerId", SearchCriteria.Op.EQ, msId);
|
||||||
|
return customSearch(sc, null);
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<String> listOrderedHostsHypervisorVersionsInDatacenter(long datacenterId, HypervisorType hypervisorType) {
|
public List<String> listOrderedHostsHypervisorVersionsInDatacenter(long datacenterId, HypervisorType hypervisorType) {
|
||||||
PreparedStatement pstmt;
|
PreparedStatement pstmt;
|
||||||
|
|
@ -1745,13 +1758,15 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public List<Long> findHostIdsByZoneClusterResourceStateTypeAndHypervisorType(final Long zoneId, final Long clusterId,
|
public List<Long> findHostIdsByZoneClusterResourceStateTypeAndHypervisorType(final Long zoneId,
|
||||||
|
final Long clusterId, final Long managementServerId,
|
||||||
final List<ResourceState> resourceStates, final List<Type> types,
|
final List<ResourceState> resourceStates, final List<Type> types,
|
||||||
final List<Hypervisor.HypervisorType> hypervisorTypes) {
|
final List<Hypervisor.HypervisorType> hypervisorTypes) {
|
||||||
GenericSearchBuilder<HostVO, Long> sb = createSearchBuilder(Long.class);
|
GenericSearchBuilder<HostVO, Long> sb = createSearchBuilder(Long.class);
|
||||||
sb.selectFields(sb.entity().getId());
|
sb.selectFields(sb.entity().getId());
|
||||||
sb.and("zoneId", sb.entity().getDataCenterId(), SearchCriteria.Op.EQ);
|
sb.and("zoneId", sb.entity().getDataCenterId(), SearchCriteria.Op.EQ);
|
||||||
sb.and("clusterId", sb.entity().getClusterId(), SearchCriteria.Op.EQ);
|
sb.and("clusterId", sb.entity().getClusterId(), SearchCriteria.Op.EQ);
|
||||||
|
sb.and("msId", sb.entity().getManagementServerId(), SearchCriteria.Op.EQ);
|
||||||
sb.and("resourceState", sb.entity().getResourceState(), SearchCriteria.Op.IN);
|
sb.and("resourceState", sb.entity().getResourceState(), SearchCriteria.Op.IN);
|
||||||
sb.and("type", sb.entity().getType(), SearchCriteria.Op.IN);
|
sb.and("type", sb.entity().getType(), SearchCriteria.Op.IN);
|
||||||
if (CollectionUtils.isNotEmpty(hypervisorTypes)) {
|
if (CollectionUtils.isNotEmpty(hypervisorTypes)) {
|
||||||
|
|
@ -1767,6 +1782,9 @@ public class HostDaoImpl extends GenericDaoBase<HostVO, Long> implements HostDao
|
||||||
if (clusterId != null) {
|
if (clusterId != null) {
|
||||||
sc.setParameters("clusterId", clusterId);
|
sc.setParameters("clusterId", clusterId);
|
||||||
}
|
}
|
||||||
|
if (managementServerId != null) {
|
||||||
|
sc.setParameters("msId", managementServerId);
|
||||||
|
}
|
||||||
if (CollectionUtils.isNotEmpty(hypervisorTypes)) {
|
if (CollectionUtils.isNotEmpty(hypervisorTypes)) {
|
||||||
sc.setParameters("hypervisorTypes", hypervisorTypes.toArray());
|
sc.setParameters("hypervisorTypes", hypervisorTypes.toArray());
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -104,6 +104,7 @@ public class HostDaoImplTest {
|
||||||
public void testFindHostIdsByZoneClusterResourceStateTypeAndHypervisorType() {
|
public void testFindHostIdsByZoneClusterResourceStateTypeAndHypervisorType() {
|
||||||
Long zoneId = 1L;
|
Long zoneId = 1L;
|
||||||
Long clusterId = 2L;
|
Long clusterId = 2L;
|
||||||
|
Long msId = 1L;
|
||||||
List<ResourceState> resourceStates = List.of(ResourceState.Enabled);
|
List<ResourceState> resourceStates = List.of(ResourceState.Enabled);
|
||||||
List<Host.Type> types = List.of(Host.Type.Routing);
|
List<Host.Type> types = List.of(Host.Type.Routing);
|
||||||
List<Hypervisor.HypervisorType> hypervisorTypes = List.of(Hypervisor.HypervisorType.KVM);
|
List<Hypervisor.HypervisorType> hypervisorTypes = List.of(Hypervisor.HypervisorType.KVM);
|
||||||
|
|
@ -117,10 +118,11 @@ public class HostDaoImplTest {
|
||||||
Mockito.doReturn(sb).when(hostDao).createSearchBuilder(Long.class);
|
Mockito.doReturn(sb).when(hostDao).createSearchBuilder(Long.class);
|
||||||
Mockito.doReturn(mockResults).when(hostDao).customSearch(Mockito.any(SearchCriteria.class), Mockito.any());
|
Mockito.doReturn(mockResults).when(hostDao).customSearch(Mockito.any(SearchCriteria.class), Mockito.any());
|
||||||
List<Long> hostIds = hostDao.findHostIdsByZoneClusterResourceStateTypeAndHypervisorType(
|
List<Long> hostIds = hostDao.findHostIdsByZoneClusterResourceStateTypeAndHypervisorType(
|
||||||
zoneId, clusterId, resourceStates, types, hypervisorTypes);
|
zoneId, clusterId, msId, resourceStates, types, hypervisorTypes);
|
||||||
Assert.assertEquals(mockResults, hostIds);
|
Assert.assertEquals(mockResults, hostIds);
|
||||||
Mockito.verify(sc).setParameters("zoneId", zoneId);
|
Mockito.verify(sc).setParameters("zoneId", zoneId);
|
||||||
Mockito.verify(sc).setParameters("clusterId", clusterId);
|
Mockito.verify(sc).setParameters("clusterId", clusterId);
|
||||||
|
Mockito.verify(sc).setParameters("msId", msId);
|
||||||
Mockito.verify(sc).setParameters("resourceState", resourceStates.toArray());
|
Mockito.verify(sc).setParameters("resourceState", resourceStates.toArray());
|
||||||
Mockito.verify(sc).setParameters("type", types.toArray());
|
Mockito.verify(sc).setParameters("type", types.toArray());
|
||||||
Mockito.verify(sc).setParameters("hypervisorTypes", hypervisorTypes.toArray());
|
Mockito.verify(sc).setParameters("hypervisorTypes", hypervisorTypes.toArray());
|
||||||
|
|
|
||||||
|
|
@ -237,7 +237,7 @@ public class AsyncJobManagerImpl extends ManagerBase implements AsyncJobManager,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
throw new CloudRuntimeException("Maintenance or Shutdown has been initiated on this management server. Can not accept new jobs");
|
throw new CloudRuntimeException("Maintenance or Shutdown has been initiated on this management server. Can not accept new async jobs");
|
||||||
}
|
}
|
||||||
|
|
||||||
private boolean checkSyncQueueItemAllowed(SyncQueueItemVO item) {
|
private boolean checkSyncQueueItemAllowed(SyncQueueItemVO item) {
|
||||||
|
|
|
||||||
|
|
@ -18,6 +18,10 @@
|
||||||
package org.apache.cloudstack.maintenance;
|
package org.apache.cloudstack.maintenance;
|
||||||
|
|
||||||
public interface ManagementServerMaintenanceListener {
|
public interface ManagementServerMaintenanceListener {
|
||||||
|
void onManagementServerPreparingForMaintenance();
|
||||||
|
|
||||||
|
void onManagementServerCancelPreparingForMaintenance();
|
||||||
|
|
||||||
void onManagementServerMaintenance();
|
void onManagementServerMaintenance();
|
||||||
|
|
||||||
void onManagementServerCancelMaintenance();
|
void onManagementServerCancelMaintenance();
|
||||||
|
|
|
||||||
|
|
@ -44,6 +44,10 @@ public interface ManagementServerMaintenanceManager {
|
||||||
|
|
||||||
void unregisterListener(ManagementServerMaintenanceListener listener);
|
void unregisterListener(ManagementServerMaintenanceListener listener);
|
||||||
|
|
||||||
|
void onPreparingForMaintenance();
|
||||||
|
|
||||||
|
void onCancelPreparingForMaintenance();
|
||||||
|
|
||||||
void onMaintenance();
|
void onMaintenance();
|
||||||
|
|
||||||
void onCancelMaintenance();
|
void onCancelMaintenance();
|
||||||
|
|
|
||||||
|
|
@ -53,6 +53,9 @@ import com.cloud.agent.api.Command;
|
||||||
import com.cloud.cluster.ClusterManager;
|
import com.cloud.cluster.ClusterManager;
|
||||||
import com.cloud.cluster.ManagementServerHostVO;
|
import com.cloud.cluster.ManagementServerHostVO;
|
||||||
import com.cloud.cluster.dao.ManagementServerHostDao;
|
import com.cloud.cluster.dao.ManagementServerHostDao;
|
||||||
|
import com.cloud.event.ActionEvent;
|
||||||
|
import com.cloud.event.EventTypes;
|
||||||
|
import com.cloud.host.HostVO;
|
||||||
import com.cloud.host.dao.HostDao;
|
import com.cloud.host.dao.HostDao;
|
||||||
import com.cloud.serializer.GsonHelper;
|
import com.cloud.serializer.GsonHelper;
|
||||||
import com.cloud.utils.StringUtils;
|
import com.cloud.utils.StringUtils;
|
||||||
|
|
@ -108,6 +111,25 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public boolean stop() {
|
||||||
|
ManagementServerHostVO msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId());
|
||||||
|
if (msHost != null) {
|
||||||
|
updateLastManagementServerForHosts(msHost.getMsid());
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
private void updateLastManagementServerForHosts(long msId) {
|
||||||
|
List<HostVO> hosts = hostDao.listHostsByMs(msId);
|
||||||
|
for (HostVO host : hosts) {
|
||||||
|
if (host != null) {
|
||||||
|
host.setLastManagementServerId(msId);
|
||||||
|
hostDao.update(host.getId(), host);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void registerListener(ManagementServerMaintenanceListener listener) {
|
public void registerListener(ManagementServerMaintenanceListener listener) {
|
||||||
synchronized (_listeners) {
|
synchronized (_listeners) {
|
||||||
|
|
@ -124,6 +146,26 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onPreparingForMaintenance() {
|
||||||
|
synchronized (_listeners) {
|
||||||
|
for (final ManagementServerMaintenanceListener listener : _listeners) {
|
||||||
|
logger.info("Invoke, on preparing for maintenance for listener " + listener.getClass());
|
||||||
|
listener.onManagementServerPreparingForMaintenance();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
public void onCancelPreparingForMaintenance() {
|
||||||
|
synchronized (_listeners) {
|
||||||
|
for (final ManagementServerMaintenanceListener listener : _listeners) {
|
||||||
|
logger.info("Invoke, on cancel preparing for maintenance for listener " + listener.getClass());
|
||||||
|
listener.onManagementServerCancelPreparingForMaintenance();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
public void onMaintenance() {
|
public void onMaintenance() {
|
||||||
synchronized (_listeners) {
|
synchronized (_listeners) {
|
||||||
|
|
@ -243,6 +285,7 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
|
||||||
this.maintenanceStartTime = System.currentTimeMillis();
|
this.maintenanceStartTime = System.currentTimeMillis();
|
||||||
this.lbAlgorithm = lbAlorithm;
|
this.lbAlgorithm = lbAlorithm;
|
||||||
jobManager.disableAsyncJobs();
|
jobManager.disableAsyncJobs();
|
||||||
|
onPreparingForMaintenance();
|
||||||
waitForPendingJobs();
|
waitForPendingJobs();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -257,8 +300,13 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
|
||||||
jobManager.enableAsyncJobs();
|
jobManager.enableAsyncJobs();
|
||||||
cancelWaitForPendingJobs();
|
cancelWaitForPendingJobs();
|
||||||
ManagementServerHostVO msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId());
|
ManagementServerHostVO msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId());
|
||||||
if (msHost != null && State.Maintenance.equals(msHost.getState())) {
|
if (msHost != null) {
|
||||||
onCancelMaintenance();
|
if (State.PreparingForMaintenance.equals(msHost.getState())) {
|
||||||
|
onCancelPreparingForMaintenance();
|
||||||
|
}
|
||||||
|
if (State.Maintenance.equals(msHost.getState())) {
|
||||||
|
onCancelMaintenance();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -284,6 +332,7 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ActionEvent(eventType = EventTypes.EVENT_MS_SHUTDOWN_PREPARE, eventDescription = "preparing for shutdown")
|
||||||
public ManagementServerMaintenanceResponse prepareForShutdown(PrepareForShutdownCmd cmd) {
|
public ManagementServerMaintenanceResponse prepareForShutdown(PrepareForShutdownCmd cmd) {
|
||||||
ManagementServerHostVO msHost = msHostDao.findById(cmd.getManagementServerId());
|
ManagementServerHostVO msHost = msHostDao.findById(cmd.getManagementServerId());
|
||||||
if (msHost == null) {
|
if (msHost == null) {
|
||||||
|
|
@ -294,19 +343,18 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
|
||||||
throw new CloudRuntimeException("Management server is not in the right state to prepare for shutdown");
|
throw new CloudRuntimeException("Management server is not in the right state to prepare for shutdown");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
checkAnyMsInPreparingStates("prepare for shutdown");
|
||||||
|
|
||||||
final Command[] cmds = new Command[1];
|
final Command[] cmds = new Command[1];
|
||||||
cmds[0] = new PrepareForShutdownManagementServerHostCommand(msHost.getMsid());
|
cmds[0] = new PrepareForShutdownManagementServerHostCommand(msHost.getMsid());
|
||||||
String result = clusterManager.execute(String.valueOf(msHost.getMsid()), 0, gson.toJson(cmds), true);
|
executeCmd(msHost, cmds);
|
||||||
logger.info("PrepareForShutdownCmd result : " + result);
|
|
||||||
if (!result.startsWith("Success")) {
|
|
||||||
throw new CloudRuntimeException(result);
|
|
||||||
}
|
|
||||||
|
|
||||||
msHostDao.updateState(msHost.getId(), State.PreparingForShutDown);
|
msHostDao.updateState(msHost.getId(), State.PreparingForShutDown);
|
||||||
return prepareMaintenanceResponse(cmd.getManagementServerId());
|
return prepareMaintenanceResponse(cmd.getManagementServerId());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ActionEvent(eventType = EventTypes.EVENT_MS_SHUTDOWN, eventDescription = "triggering shutdown")
|
||||||
public ManagementServerMaintenanceResponse triggerShutdown(TriggerShutdownCmd cmd) {
|
public ManagementServerMaintenanceResponse triggerShutdown(TriggerShutdownCmd cmd) {
|
||||||
ManagementServerHostVO msHost = msHostDao.findById(cmd.getManagementServerId());
|
ManagementServerHostVO msHost = msHostDao.findById(cmd.getManagementServerId());
|
||||||
if (msHost == null) {
|
if (msHost == null) {
|
||||||
|
|
@ -319,22 +367,20 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
|
||||||
}
|
}
|
||||||
|
|
||||||
if (State.Up.equals(msHost.getState())) {
|
if (State.Up.equals(msHost.getState())) {
|
||||||
|
checkAnyMsInPreparingStates("trigger shutdown");
|
||||||
msHostDao.updateState(msHost.getId(), State.PreparingForShutDown);
|
msHostDao.updateState(msHost.getId(), State.PreparingForShutDown);
|
||||||
}
|
}
|
||||||
|
|
||||||
final Command[] cmds = new Command[1];
|
final Command[] cmds = new Command[1];
|
||||||
cmds[0] = new TriggerShutdownManagementServerHostCommand(msHost.getMsid());
|
cmds[0] = new TriggerShutdownManagementServerHostCommand(msHost.getMsid());
|
||||||
String result = clusterManager.execute(String.valueOf(msHost.getMsid()), 0, gson.toJson(cmds), true);
|
executeCmd(msHost, cmds);
|
||||||
logger.info("TriggerShutdownCmd result : " + result);
|
|
||||||
if (!result.startsWith("Success")) {
|
|
||||||
throw new CloudRuntimeException(result);
|
|
||||||
}
|
|
||||||
|
|
||||||
msHostDao.updateState(msHost.getId(), State.ShuttingDown);
|
msHostDao.updateState(msHost.getId(), State.ShuttingDown);
|
||||||
return prepareMaintenanceResponse(cmd.getManagementServerId());
|
return prepareMaintenanceResponse(cmd.getManagementServerId());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ActionEvent(eventType = EventTypes.EVENT_MS_SHUTDOWN_CANCEL, eventDescription = "cancelling shutdown")
|
||||||
public ManagementServerMaintenanceResponse cancelShutdown(CancelShutdownCmd cmd) {
|
public ManagementServerMaintenanceResponse cancelShutdown(CancelShutdownCmd cmd) {
|
||||||
ManagementServerHostVO msHost = msHostDao.findById(cmd.getManagementServerId());
|
ManagementServerHostVO msHost = msHostDao.findById(cmd.getManagementServerId());
|
||||||
if (msHost == null) {
|
if (msHost == null) {
|
||||||
|
|
@ -347,17 +393,14 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
|
||||||
|
|
||||||
final Command[] cmds = new Command[1];
|
final Command[] cmds = new Command[1];
|
||||||
cmds[0] = new CancelShutdownManagementServerHostCommand(msHost.getMsid());
|
cmds[0] = new CancelShutdownManagementServerHostCommand(msHost.getMsid());
|
||||||
String result = clusterManager.execute(String.valueOf(msHost.getMsid()), 0, gson.toJson(cmds), true);
|
executeCmd(msHost, cmds);
|
||||||
logger.info("CancelShutdownCmd result : " + result);
|
|
||||||
if (!result.startsWith("Success")) {
|
|
||||||
throw new CloudRuntimeException(result);
|
|
||||||
}
|
|
||||||
|
|
||||||
msHostDao.updateState(msHost.getId(), State.Up);
|
msHostDao.updateState(msHost.getId(), State.Up);
|
||||||
return prepareMaintenanceResponse(cmd.getManagementServerId());
|
return prepareMaintenanceResponse(cmd.getManagementServerId());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ActionEvent(eventType = EventTypes.EVENT_MS_MAINTENANCE_PREPARE, eventDescription = "preparing for maintenance")
|
||||||
public ManagementServerMaintenanceResponse prepareForMaintenance(PrepareForMaintenanceCmd cmd) {
|
public ManagementServerMaintenanceResponse prepareForMaintenance(PrepareForMaintenanceCmd cmd) {
|
||||||
if (StringUtils.isNotBlank(cmd.getAlgorithm())) {
|
if (StringUtils.isNotBlank(cmd.getAlgorithm())) {
|
||||||
indirectAgentLB.checkLBAlgorithmName(cmd.getAlgorithm());
|
indirectAgentLB.checkLBAlgorithmName(cmd.getAlgorithm());
|
||||||
|
|
@ -381,10 +424,7 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
|
||||||
throw new CloudRuntimeException("Management server is not in the right state to prepare for maintenance");
|
throw new CloudRuntimeException("Management server is not in the right state to prepare for maintenance");
|
||||||
}
|
}
|
||||||
|
|
||||||
final List<ManagementServerHostVO> preparingForMaintenanceMsList = msHostDao.listBy(State.PreparingForMaintenance);
|
checkAnyMsInPreparingStates("prepare for maintenance");
|
||||||
if (CollectionUtils.isNotEmpty(preparingForMaintenanceMsList)) {
|
|
||||||
throw new CloudRuntimeException("Cannot prepare for maintenance, there are other management servers preparing for maintenance");
|
|
||||||
}
|
|
||||||
|
|
||||||
if (indirectAgentLB.haveAgentBasedHosts(msHost.getMsid())) {
|
if (indirectAgentLB.haveAgentBasedHosts(msHost.getMsid())) {
|
||||||
List<String> indirectAgentMsList = indirectAgentLB.getManagementServerList();
|
List<String> indirectAgentMsList = indirectAgentLB.getManagementServerList();
|
||||||
|
|
@ -396,23 +436,16 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
List<String> lastAgents = hostDao.listByMs(cmd.getManagementServerId());
|
|
||||||
agentMgr.setLastAgents(lastAgents);
|
|
||||||
|
|
||||||
final Command[] cmds = new Command[1];
|
final Command[] cmds = new Command[1];
|
||||||
cmds[0] = new PrepareForMaintenanceManagementServerHostCommand(msHost.getMsid(), cmd.getAlgorithm());
|
cmds[0] = new PrepareForMaintenanceManagementServerHostCommand(msHost.getMsid(), cmd.getAlgorithm());
|
||||||
String result = clusterManager.execute(String.valueOf(msHost.getMsid()), 0, gson.toJson(cmds), true);
|
executeCmd(msHost, cmds);
|
||||||
logger.info("PrepareForMaintenanceCmd result : " + result);
|
|
||||||
if (!result.startsWith("Success")) {
|
|
||||||
agentMgr.setLastAgents(null);
|
|
||||||
throw new CloudRuntimeException(result);
|
|
||||||
}
|
|
||||||
|
|
||||||
msHostDao.updateState(msHost.getId(), State.PreparingForMaintenance);
|
msHostDao.updateState(msHost.getId(), State.PreparingForMaintenance);
|
||||||
return prepareMaintenanceResponse(cmd.getManagementServerId());
|
return prepareMaintenanceResponse(cmd.getManagementServerId());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
@ActionEvent(eventType = EventTypes.EVENT_MS_MAINTENANCE_CANCEL, eventDescription = "cancelling maintenance")
|
||||||
public ManagementServerMaintenanceResponse cancelMaintenance(CancelMaintenanceCmd cmd) {
|
public ManagementServerMaintenanceResponse cancelMaintenance(CancelMaintenanceCmd cmd) {
|
||||||
ManagementServerHostVO msHost = msHostDao.findById(cmd.getManagementServerId());
|
ManagementServerHostVO msHost = msHostDao.findById(cmd.getManagementServerId());
|
||||||
if (msHost == null) {
|
if (msHost == null) {
|
||||||
|
|
@ -425,15 +458,29 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
|
||||||
|
|
||||||
final Command[] cmds = new Command[1];
|
final Command[] cmds = new Command[1];
|
||||||
cmds[0] = new CancelMaintenanceManagementServerHostCommand(msHost.getMsid());
|
cmds[0] = new CancelMaintenanceManagementServerHostCommand(msHost.getMsid());
|
||||||
String result = clusterManager.execute(String.valueOf(msHost.getMsid()), 0, gson.toJson(cmds), true);
|
executeCmd(msHost, cmds);
|
||||||
logger.info("CancelMaintenanceCmd result : " + result);
|
|
||||||
|
msHostDao.updateState(msHost.getId(), State.Up);
|
||||||
|
return prepareMaintenanceResponse(cmd.getManagementServerId());
|
||||||
|
}
|
||||||
|
|
||||||
|
private void executeCmd(ManagementServerHostVO msHost, Command[] cmds) {
|
||||||
|
if (msHost == null) {
|
||||||
|
throw new CloudRuntimeException("Management server node not specified, to execute the cmd");
|
||||||
|
}
|
||||||
|
if (cmds == null || cmds.length <= 0) {
|
||||||
|
throw new CloudRuntimeException(String.format("Cmd not specified, to execute on the management server node %s", msHost));
|
||||||
|
}
|
||||||
|
String result = clusterManager.execute(String.valueOf(msHost.getMsid()), 0, gson.toJson(cmds), false);
|
||||||
|
if (result == null) {
|
||||||
|
String msg = String.format("Unable to reach or execute %s on the management server node: %s", cmds[0], msHost);
|
||||||
|
logger.warn(msg);
|
||||||
|
throw new CloudRuntimeException(msg);
|
||||||
|
}
|
||||||
|
logger.info(String.format("Cmd %s - result: %s", cmds[0], result));
|
||||||
if (!result.startsWith("Success")) {
|
if (!result.startsWith("Success")) {
|
||||||
throw new CloudRuntimeException(result);
|
throw new CloudRuntimeException(result);
|
||||||
}
|
}
|
||||||
|
|
||||||
msHostDao.updateState(msHost.getId(), State.Up);
|
|
||||||
agentMgr.setLastAgents(null);
|
|
||||||
return prepareMaintenanceResponse(cmd.getManagementServerId());
|
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
@ -445,9 +492,17 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
|
||||||
if (msHost == null) {
|
if (msHost == null) {
|
||||||
msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId());
|
msHost = msHostDao.findByMsid(ManagementServerNode.getManagementServerId());
|
||||||
}
|
}
|
||||||
|
onCancelPreparingForMaintenance();
|
||||||
msHostDao.updateState(msHost.getId(), State.Up);
|
msHostDao.updateState(msHost.getId(), State.Up);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void checkAnyMsInPreparingStates(String operation) {
|
||||||
|
final List<ManagementServerHostVO> preparingForMaintenanceOrShutDownMsList = msHostDao.listBy(State.PreparingForMaintenance, State.PreparingForShutDown);
|
||||||
|
if (CollectionUtils.isNotEmpty(preparingForMaintenanceOrShutDownMsList)) {
|
||||||
|
throw new CloudRuntimeException(String.format("Cannot %s, there are other management servers preparing for maintenance/shutdown", operation));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private ManagementServerMaintenanceResponse prepareMaintenanceResponse(Long managementServerId) {
|
private ManagementServerMaintenanceResponse prepareMaintenanceResponse(Long managementServerId) {
|
||||||
ManagementServerHostVO msHost;
|
ManagementServerHostVO msHost;
|
||||||
Long[] msIds;
|
Long[] msIds;
|
||||||
|
|
@ -465,8 +520,8 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
|
||||||
boolean maintenanceInitiatedForMS = Arrays.asList(maintenanceStates).contains(msHost.getState());
|
boolean maintenanceInitiatedForMS = Arrays.asList(maintenanceStates).contains(msHost.getState());
|
||||||
boolean shutdownTriggeredForMS = Arrays.asList(shutdownStates).contains(msHost.getState());
|
boolean shutdownTriggeredForMS = Arrays.asList(shutdownStates).contains(msHost.getState());
|
||||||
msIds = new Long[]{msHost.getMsid()};
|
msIds = new Long[]{msHost.getMsid()};
|
||||||
List<String> agents = hostDao.listByMs(managementServerId);
|
List<String> agents = hostDao.listByMs(msHost.getMsid());
|
||||||
long agentsCount = hostDao.countByMs(managementServerId);
|
long agentsCount = agents.size();
|
||||||
long pendingJobCount = countPendingJobs(msIds);
|
long pendingJobCount = countPendingJobs(msIds);
|
||||||
return new ManagementServerMaintenanceResponse(msHost.getUuid(), msHost.getState(), maintenanceInitiatedForMS, shutdownTriggeredForMS, pendingJobCount == 0, pendingJobCount, agentsCount, agents);
|
return new ManagementServerMaintenanceResponse(msHost.getUuid(), msHost.getState(), maintenanceInitiatedForMS, shutdownTriggeredForMS, pendingJobCount == 0, pendingJobCount, agentsCount, agents);
|
||||||
}
|
}
|
||||||
|
|
@ -535,7 +590,6 @@ public class ManagementServerMaintenanceManagerImpl extends ManagerBase implemen
|
||||||
// No more pending jobs. Good to terminate
|
// No more pending jobs. Good to terminate
|
||||||
if (managementServerMaintenanceManager.isShutdownTriggered()) {
|
if (managementServerMaintenanceManager.isShutdownTriggered()) {
|
||||||
logger.info("MS is Shutting Down Now");
|
logger.info("MS is Shutting Down Now");
|
||||||
// update state to down ?
|
|
||||||
System.exit(0);
|
System.exit(0);
|
||||||
}
|
}
|
||||||
if (managementServerMaintenanceManager.isPreparingForMaintenance()) {
|
if (managementServerMaintenanceManager.isPreparingForMaintenance()) {
|
||||||
|
|
|
||||||
|
|
@ -17,7 +17,23 @@
|
||||||
|
|
||||||
package org.apache.cloudstack.maintenance;
|
package org.apache.cloudstack.maintenance;
|
||||||
|
|
||||||
|
import static org.mockito.ArgumentMatchers.any;
|
||||||
|
import static org.mockito.ArgumentMatchers.anyBoolean;
|
||||||
|
import static org.mockito.ArgumentMatchers.anyLong;
|
||||||
|
import static org.mockito.ArgumentMatchers.anyString;
|
||||||
|
import static org.mockito.Mockito.mock;
|
||||||
|
|
||||||
|
import java.util.ArrayList;
|
||||||
|
import java.util.List;
|
||||||
|
|
||||||
|
import org.apache.cloudstack.agent.lb.IndirectAgentLB;
|
||||||
|
import org.apache.cloudstack.api.command.CancelMaintenanceCmd;
|
||||||
|
import org.apache.cloudstack.api.command.CancelShutdownCmd;
|
||||||
|
import org.apache.cloudstack.api.command.PrepareForMaintenanceCmd;
|
||||||
|
import org.apache.cloudstack.api.command.PrepareForShutdownCmd;
|
||||||
|
import org.apache.cloudstack.api.command.TriggerShutdownCmd;
|
||||||
import org.apache.cloudstack.framework.jobs.AsyncJobManager;
|
import org.apache.cloudstack.framework.jobs.AsyncJobManager;
|
||||||
|
import org.apache.cloudstack.management.ManagementServerHost;
|
||||||
import org.junit.Assert;
|
import org.junit.Assert;
|
||||||
import org.junit.Test;
|
import org.junit.Test;
|
||||||
import org.junit.runner.RunWith;
|
import org.junit.runner.RunWith;
|
||||||
|
|
@ -27,6 +43,11 @@ import org.mockito.Mockito;
|
||||||
import org.mockito.Spy;
|
import org.mockito.Spy;
|
||||||
import org.mockito.junit.MockitoJUnitRunner;
|
import org.mockito.junit.MockitoJUnitRunner;
|
||||||
|
|
||||||
|
import com.cloud.agent.AgentManager;
|
||||||
|
import com.cloud.cluster.ClusterManager;
|
||||||
|
import com.cloud.cluster.ManagementServerHostVO;
|
||||||
|
import com.cloud.cluster.dao.ManagementServerHostDao;
|
||||||
|
import com.cloud.host.dao.HostDao;
|
||||||
import com.cloud.utils.exception.CloudRuntimeException;
|
import com.cloud.utils.exception.CloudRuntimeException;
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -40,6 +61,21 @@ public class ManagementServerMaintenanceManagerImplTest {
|
||||||
@Mock
|
@Mock
|
||||||
AsyncJobManager jobManagerMock;
|
AsyncJobManager jobManagerMock;
|
||||||
|
|
||||||
|
@Mock
|
||||||
|
IndirectAgentLB indirectAgentLBMock;
|
||||||
|
|
||||||
|
@Mock
|
||||||
|
AgentManager agentManagerMock;
|
||||||
|
|
||||||
|
@Mock
|
||||||
|
ClusterManager clusterManagerMock;
|
||||||
|
|
||||||
|
@Mock
|
||||||
|
HostDao hostDao;
|
||||||
|
|
||||||
|
@Mock
|
||||||
|
ManagementServerHostDao msHostDao;
|
||||||
|
|
||||||
private long prepareCountPendingJobs() {
|
private long prepareCountPendingJobs() {
|
||||||
long expectedCount = 1L;
|
long expectedCount = 1L;
|
||||||
Mockito.doReturn(expectedCount).when(jobManagerMock).countPendingNonPseudoJobs(1L);
|
Mockito.doReturn(expectedCount).when(jobManagerMock).countPendingNonPseudoJobs(1L);
|
||||||
|
|
@ -53,13 +89,6 @@ public class ManagementServerMaintenanceManagerImplTest {
|
||||||
Assert.assertEquals(expectedCount, count);
|
Assert.assertEquals(expectedCount, count);
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
|
||||||
public void cancelShutdown() {
|
|
||||||
Assert.assertThrows(CloudRuntimeException.class, () -> {
|
|
||||||
spy.cancelShutdown();
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void prepareForShutdown() {
|
public void prepareForShutdown() {
|
||||||
Mockito.doNothing().when(jobManagerMock).disableAsyncJobs();
|
Mockito.doNothing().when(jobManagerMock).disableAsyncJobs();
|
||||||
|
|
@ -74,4 +103,463 @@ public class ManagementServerMaintenanceManagerImplTest {
|
||||||
spy.cancelShutdown();
|
spy.cancelShutdown();
|
||||||
Mockito.verify(jobManagerMock).enableAsyncJobs();
|
Mockito.verify(jobManagerMock).enableAsyncJobs();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void cancelShutdown() {
|
||||||
|
Assert.assertThrows(CloudRuntimeException.class, () -> {
|
||||||
|
spy.cancelShutdown();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void triggerShutdown() {
|
||||||
|
Mockito.doNothing().when(jobManagerMock).disableAsyncJobs();
|
||||||
|
Mockito.lenient().when(spy.isShutdownTriggered()).thenReturn(false);
|
||||||
|
spy.triggerShutdown();
|
||||||
|
Mockito.verify(jobManagerMock).disableAsyncJobs();
|
||||||
|
|
||||||
|
Assert.assertThrows(CloudRuntimeException.class, () -> {
|
||||||
|
spy.triggerShutdown();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void prepareForShutdownCmdNoMsHost() {
|
||||||
|
Mockito.when(msHostDao.findById(1L)).thenReturn(null);
|
||||||
|
PrepareForShutdownCmd cmd = mock(PrepareForShutdownCmd.class);
|
||||||
|
Mockito.when(cmd.getManagementServerId()).thenReturn(1L);
|
||||||
|
|
||||||
|
Assert.assertThrows(CloudRuntimeException.class, () -> {
|
||||||
|
spy.prepareForShutdown(cmd);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void prepareForShutdownCmdMsHostWithNonUpState() {
|
||||||
|
ManagementServerHostVO msHost = mock(ManagementServerHostVO.class);
|
||||||
|
Mockito.when(msHost.getState()).thenReturn(ManagementServerHost.State.Maintenance);
|
||||||
|
Mockito.when(msHostDao.findById(1L)).thenReturn(msHost);
|
||||||
|
PrepareForShutdownCmd cmd = mock(PrepareForShutdownCmd.class);
|
||||||
|
Mockito.when(cmd.getManagementServerId()).thenReturn(1L);
|
||||||
|
|
||||||
|
Assert.assertThrows(CloudRuntimeException.class, () -> {
|
||||||
|
spy.prepareForShutdown(cmd);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void prepareForShutdownCmdOtherMsHostsInPreparingState() {
|
||||||
|
ManagementServerHostVO msHost1 = mock(ManagementServerHostVO.class);
|
||||||
|
Mockito.when(msHost1.getState()).thenReturn(ManagementServerHost.State.Up);
|
||||||
|
ManagementServerHostVO msHost2 = mock(ManagementServerHostVO.class);
|
||||||
|
List<ManagementServerHostVO> msHostList = new ArrayList<>();
|
||||||
|
msHostList.add(msHost2);
|
||||||
|
Mockito.when(msHostDao.listBy(any())).thenReturn(msHostList);
|
||||||
|
Mockito.when(msHostDao.findById(1L)).thenReturn(msHost1);
|
||||||
|
PrepareForShutdownCmd cmd = mock(PrepareForShutdownCmd.class);
|
||||||
|
Mockito.when(cmd.getManagementServerId()).thenReturn(1L);
|
||||||
|
|
||||||
|
Assert.assertThrows(CloudRuntimeException.class, () -> {
|
||||||
|
spy.prepareForShutdown(cmd);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void prepareForShutdownCmdNullResponseFromClusterManager() {
|
||||||
|
ManagementServerHostVO msHost = mock(ManagementServerHostVO.class);
|
||||||
|
Mockito.when(msHost.getState()).thenReturn(ManagementServerHost.State.Up);
|
||||||
|
List<ManagementServerHostVO> msHostList = new ArrayList<>();
|
||||||
|
Mockito.when(msHostDao.listBy(any())).thenReturn(msHostList);
|
||||||
|
Mockito.when(msHostDao.findById(1L)).thenReturn(msHost);
|
||||||
|
PrepareForShutdownCmd cmd = mock(PrepareForShutdownCmd.class);
|
||||||
|
Mockito.when(cmd.getManagementServerId()).thenReturn(1L);
|
||||||
|
Mockito.when(clusterManagerMock.execute(anyString(), anyLong(), anyString(), anyBoolean())).thenReturn(null);
|
||||||
|
|
||||||
|
Assert.assertThrows(CloudRuntimeException.class, () -> {
|
||||||
|
spy.prepareForShutdown(cmd);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void prepareForShutdownCmdFailedResponseFromClusterManager() {
|
||||||
|
ManagementServerHostVO msHost = mock(ManagementServerHostVO.class);
|
||||||
|
Mockito.when(msHost.getState()).thenReturn(ManagementServerHost.State.Up);
|
||||||
|
List<ManagementServerHostVO> msHostList = new ArrayList<>();
|
||||||
|
Mockito.when(msHostDao.listBy(any())).thenReturn(msHostList);
|
||||||
|
Mockito.when(msHostDao.findById(1L)).thenReturn(msHost);
|
||||||
|
PrepareForShutdownCmd cmd = mock(PrepareForShutdownCmd.class);
|
||||||
|
Mockito.when(cmd.getManagementServerId()).thenReturn(1L);
|
||||||
|
Mockito.when(clusterManagerMock.execute(anyString(), anyLong(), anyString(), anyBoolean())).thenReturn("Failed");
|
||||||
|
|
||||||
|
Assert.assertThrows(CloudRuntimeException.class, () -> {
|
||||||
|
spy.prepareForShutdown(cmd);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void prepareForShutdownCmdSuccessResponseFromClusterManager() {
|
||||||
|
ManagementServerHostVO msHost = mock(ManagementServerHostVO.class);
|
||||||
|
Mockito.when(msHost.getState()).thenReturn(ManagementServerHost.State.Up);
|
||||||
|
Mockito.when(msHostDao.listBy(any())).thenReturn(new ArrayList<>());
|
||||||
|
Mockito.when(msHostDao.findById(1L)).thenReturn(msHost);
|
||||||
|
Mockito.when(hostDao.listByMs(anyLong())).thenReturn(new ArrayList<>());
|
||||||
|
PrepareForShutdownCmd cmd = mock(PrepareForShutdownCmd.class);
|
||||||
|
Mockito.when(cmd.getManagementServerId()).thenReturn(1L);
|
||||||
|
Mockito.when(clusterManagerMock.execute(anyString(), anyLong(), anyString(), anyBoolean())).thenReturn("Success");
|
||||||
|
|
||||||
|
spy.prepareForShutdown(cmd);
|
||||||
|
Mockito.verify(clusterManagerMock, Mockito.times(1)).execute(anyString(), anyLong(), anyString(), anyBoolean());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void cancelShutdownCmdNoMsHost() {
|
||||||
|
Mockito.when(msHostDao.findById(1L)).thenReturn(null);
|
||||||
|
CancelShutdownCmd cmd = mock(CancelShutdownCmd.class);
|
||||||
|
Mockito.when(cmd.getManagementServerId()).thenReturn(1L);
|
||||||
|
|
||||||
|
Assert.assertThrows(CloudRuntimeException.class, () -> {
|
||||||
|
spy.cancelShutdown(cmd);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void cancelShutdownCmdMsHostNotInShutdownState() {
|
||||||
|
ManagementServerHostVO msHost = mock(ManagementServerHostVO.class);
|
||||||
|
Mockito.when(msHost.getState()).thenReturn(ManagementServerHost.State.Up);
|
||||||
|
Mockito.when(msHostDao.findById(1L)).thenReturn(msHost);
|
||||||
|
CancelShutdownCmd cmd = mock(CancelShutdownCmd.class);
|
||||||
|
Mockito.when(cmd.getManagementServerId()).thenReturn(1L);
|
||||||
|
|
||||||
|
Assert.assertThrows(CloudRuntimeException.class, () -> {
|
||||||
|
spy.cancelShutdown(cmd);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void cancelShutdownCmd() {
|
||||||
|
ManagementServerHostVO msHost = mock(ManagementServerHostVO.class);
|
||||||
|
Mockito.when(msHost.getState()).thenReturn(ManagementServerHost.State.ReadyToShutDown);
|
||||||
|
Mockito.when(msHostDao.findById(1L)).thenReturn(msHost);
|
||||||
|
CancelShutdownCmd cmd = mock(CancelShutdownCmd.class);
|
||||||
|
Mockito.when(cmd.getManagementServerId()).thenReturn(1L);
|
||||||
|
Mockito.when(clusterManagerMock.execute(anyString(), anyLong(), anyString(), anyBoolean())).thenReturn("Success");
|
||||||
|
|
||||||
|
spy.cancelShutdown(cmd);
|
||||||
|
Mockito.verify(clusterManagerMock, Mockito.times(1)).execute(anyString(), anyLong(), anyString(), anyBoolean());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void triggerShutdownCmdNoMsHost() {
|
||||||
|
Mockito.when(msHostDao.findById(1L)).thenReturn(null);
|
||||||
|
TriggerShutdownCmd cmd = mock(TriggerShutdownCmd.class);
|
||||||
|
Mockito.when(cmd.getManagementServerId()).thenReturn(1L);
|
||||||
|
|
||||||
|
Assert.assertThrows(CloudRuntimeException.class, () -> {
|
||||||
|
spy.triggerShutdown(cmd);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void triggerShutdownCmdMsHostWithNotRightState() {
|
||||||
|
ManagementServerHostVO msHost = mock(ManagementServerHostVO.class);
|
||||||
|
Mockito.when(msHost.getState()).thenReturn(ManagementServerHost.State.PreparingForMaintenance);
|
||||||
|
Mockito.when(msHostDao.findById(1L)).thenReturn(msHost);
|
||||||
|
TriggerShutdownCmd cmd = mock(TriggerShutdownCmd.class);
|
||||||
|
Mockito.when(cmd.getManagementServerId()).thenReturn(1L);
|
||||||
|
|
||||||
|
Assert.assertThrows(CloudRuntimeException.class, () -> {
|
||||||
|
spy.triggerShutdown(cmd);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void triggerShutdownCmdMsInUpStateAndOtherMsHostsInPreparingState() {
|
||||||
|
ManagementServerHostVO msHost1 = mock(ManagementServerHostVO.class);
|
||||||
|
Mockito.when(msHost1.getState()).thenReturn(ManagementServerHost.State.Up);
|
||||||
|
ManagementServerHostVO msHost2 = mock(ManagementServerHostVO.class);
|
||||||
|
List<ManagementServerHostVO> msHostList = new ArrayList<>();
|
||||||
|
msHostList.add(msHost2);
|
||||||
|
Mockito.when(msHostDao.listBy(any())).thenReturn(msHostList);
|
||||||
|
Mockito.when(msHostDao.findById(1L)).thenReturn(msHost1);
|
||||||
|
TriggerShutdownCmd cmd = mock(TriggerShutdownCmd.class);
|
||||||
|
Mockito.when(cmd.getManagementServerId()).thenReturn(1L);
|
||||||
|
|
||||||
|
Assert.assertThrows(CloudRuntimeException.class, () -> {
|
||||||
|
spy.triggerShutdown(cmd);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void triggerShutdownCmd() {
|
||||||
|
ManagementServerHostVO msHost = mock(ManagementServerHostVO.class);
|
||||||
|
Mockito.when(msHost.getState()).thenReturn(ManagementServerHost.State.ReadyToShutDown);
|
||||||
|
Mockito.when(msHostDao.findById(1L)).thenReturn(msHost);
|
||||||
|
TriggerShutdownCmd cmd = mock(TriggerShutdownCmd.class);
|
||||||
|
Mockito.when(cmd.getManagementServerId()).thenReturn(1L);
|
||||||
|
Mockito.when(clusterManagerMock.execute(anyString(), anyLong(), anyString(), anyBoolean())).thenReturn("Success");
|
||||||
|
|
||||||
|
spy.triggerShutdown(cmd);
|
||||||
|
Mockito.verify(clusterManagerMock, Mockito.times(1)).execute(anyString(), anyLong(), anyString(), anyBoolean());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void prepareForMaintenanceAndCancelFromMaintenanceState() {
|
||||||
|
Mockito.doNothing().when(jobManagerMock).disableAsyncJobs();
|
||||||
|
spy.prepareForMaintenance("static");
|
||||||
|
Mockito.verify(jobManagerMock).disableAsyncJobs();
|
||||||
|
|
||||||
|
Assert.assertThrows(CloudRuntimeException.class, () -> {
|
||||||
|
spy.prepareForMaintenance("static");
|
||||||
|
});
|
||||||
|
|
||||||
|
ManagementServerHostVO msHost = mock(ManagementServerHostVO.class);
|
||||||
|
Mockito.when(msHost.getState()).thenReturn(ManagementServerHost.State.Maintenance);
|
||||||
|
Mockito.when(msHostDao.findByMsid(anyLong())).thenReturn(msHost);
|
||||||
|
Mockito.doNothing().when(jobManagerMock).enableAsyncJobs();
|
||||||
|
spy.cancelMaintenance();
|
||||||
|
Mockito.verify(jobManagerMock).enableAsyncJobs();
|
||||||
|
Mockito.verify(spy, Mockito.times(1)).onCancelMaintenance();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void prepareForMaintenanceAndCancelFromPreparingForMaintenanceState() {
|
||||||
|
Mockito.doNothing().when(jobManagerMock).disableAsyncJobs();
|
||||||
|
spy.prepareForMaintenance("static");
|
||||||
|
Mockito.verify(jobManagerMock).disableAsyncJobs();
|
||||||
|
|
||||||
|
Assert.assertThrows(CloudRuntimeException.class, () -> {
|
||||||
|
spy.prepareForMaintenance("static");
|
||||||
|
});
|
||||||
|
|
||||||
|
ManagementServerHostVO msHost = mock(ManagementServerHostVO.class);
|
||||||
|
Mockito.when(msHost.getState()).thenReturn(ManagementServerHost.State.PreparingForMaintenance);
|
||||||
|
Mockito.when(msHostDao.findByMsid(anyLong())).thenReturn(msHost);
|
||||||
|
Mockito.doNothing().when(jobManagerMock).enableAsyncJobs();
|
||||||
|
spy.cancelMaintenance();
|
||||||
|
Mockito.verify(jobManagerMock).enableAsyncJobs();
|
||||||
|
Mockito.verify(spy, Mockito.times(1)).onCancelPreparingForMaintenance();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void cancelMaintenance() {
|
||||||
|
Assert.assertThrows(CloudRuntimeException.class, () -> {
|
||||||
|
spy.cancelMaintenance();
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void cancelPreparingForMaintenance() {
|
||||||
|
ManagementServerHostVO msHost = mock(ManagementServerHostVO.class);
|
||||||
|
Mockito.when(msHostDao.findByMsid(anyLong())).thenReturn(msHost);
|
||||||
|
|
||||||
|
spy.cancelPreparingForMaintenance(null);
|
||||||
|
Mockito.verify(jobManagerMock).enableAsyncJobs();
|
||||||
|
Mockito.verify(spy, Mockito.times(1)).onCancelPreparingForMaintenance();
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void prepareForMaintenanceCmdNoOtherMsHostsWithUpState() {
|
||||||
|
Mockito.when(msHostDao.listBy(any())).thenReturn(new ArrayList<>());
|
||||||
|
PrepareForMaintenanceCmd cmd = mock(PrepareForMaintenanceCmd.class);
|
||||||
|
Mockito.when(cmd.getAlgorithm()).thenReturn("test algorithm");
|
||||||
|
Mockito.doNothing().when(indirectAgentLBMock).checkLBAlgorithmName(anyString());
|
||||||
|
|
||||||
|
Assert.assertThrows(CloudRuntimeException.class, () -> {
|
||||||
|
spy.prepareForMaintenance(cmd);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void prepareForMaintenanceCmdOnlyOneMsHostsWithUpState() {
|
||||||
|
ManagementServerHostVO msHost = mock(ManagementServerHostVO.class);
|
||||||
|
List<ManagementServerHostVO> msHostList = new ArrayList<>();
|
||||||
|
msHostList.add(msHost);
|
||||||
|
Mockito.when(msHostDao.listBy(ManagementServerHost.State.Up)).thenReturn(msHostList);
|
||||||
|
PrepareForMaintenanceCmd cmd = mock(PrepareForMaintenanceCmd.class);
|
||||||
|
Mockito.when(cmd.getAlgorithm()).thenReturn("test algorithm");
|
||||||
|
Mockito.doNothing().when(indirectAgentLBMock).checkLBAlgorithmName(anyString());
|
||||||
|
|
||||||
|
Assert.assertThrows(CloudRuntimeException.class, () -> {
|
||||||
|
spy.prepareForMaintenance(cmd);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void prepareForMaintenanceCmdNoMsHost() {
|
||||||
|
ManagementServerHostVO msHost1 = mock(ManagementServerHostVO.class);
|
||||||
|
ManagementServerHostVO msHost2 = mock(ManagementServerHostVO.class);
|
||||||
|
List<ManagementServerHostVO> msHostList = new ArrayList<>();
|
||||||
|
msHostList.add(msHost1);
|
||||||
|
msHostList.add(msHost2);
|
||||||
|
Mockito.when(msHostDao.listBy(ManagementServerHost.State.Up)).thenReturn(msHostList);
|
||||||
|
Mockito.when(msHostDao.findById(1L)).thenReturn(null);
|
||||||
|
PrepareForMaintenanceCmd cmd = mock(PrepareForMaintenanceCmd.class);
|
||||||
|
Mockito.when(cmd.getManagementServerId()).thenReturn(1L);
|
||||||
|
|
||||||
|
Assert.assertThrows(CloudRuntimeException.class, () -> {
|
||||||
|
spy.prepareForMaintenance(cmd);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void prepareForMaintenanceCmdMsHostWithNonUpState() {
|
||||||
|
ManagementServerHostVO msHost1 = mock(ManagementServerHostVO.class);
|
||||||
|
Mockito.when(msHost1.getState()).thenReturn(ManagementServerHost.State.Maintenance);
|
||||||
|
ManagementServerHostVO msHost2 = mock(ManagementServerHostVO.class);
|
||||||
|
List<ManagementServerHostVO> msHostList = new ArrayList<>();
|
||||||
|
msHostList.add(msHost1);
|
||||||
|
msHostList.add(msHost2);
|
||||||
|
Mockito.when(msHostDao.listBy(ManagementServerHost.State.Up)).thenReturn(msHostList);
|
||||||
|
Mockito.when(msHostDao.findById(1L)).thenReturn(msHost1);
|
||||||
|
PrepareForMaintenanceCmd cmd = mock(PrepareForMaintenanceCmd.class);
|
||||||
|
Mockito.when(cmd.getManagementServerId()).thenReturn(1L);
|
||||||
|
|
||||||
|
Assert.assertThrows(CloudRuntimeException.class, () -> {
|
||||||
|
spy.prepareForMaintenance(cmd);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void prepareForMaintenanceCmdOtherMsHostsInPreparingState() {
|
||||||
|
ManagementServerHostVO msHost1 = mock(ManagementServerHostVO.class);
|
||||||
|
Mockito.when(msHost1.getState()).thenReturn(ManagementServerHost.State.Up);
|
||||||
|
ManagementServerHostVO msHost2 = mock(ManagementServerHostVO.class);
|
||||||
|
List<ManagementServerHostVO> msHostList1 = new ArrayList<>();
|
||||||
|
msHostList1.add(msHost1);
|
||||||
|
msHostList1.add(msHost2);
|
||||||
|
ManagementServerHostVO msHost3 = mock(ManagementServerHostVO.class);
|
||||||
|
List<ManagementServerHostVO> msHostList2 = new ArrayList<>();
|
||||||
|
msHostList2.add(msHost3);
|
||||||
|
Mockito.when(msHostDao.listBy(ManagementServerHost.State.Up)).thenReturn(msHostList1);
|
||||||
|
Mockito.when(msHostDao.listBy(ManagementServerHost.State.PreparingForMaintenance, ManagementServerHost.State.PreparingForShutDown)).thenReturn(msHostList2);
|
||||||
|
Mockito.when(msHostDao.findById(1L)).thenReturn(msHost1);
|
||||||
|
PrepareForMaintenanceCmd cmd = mock(PrepareForMaintenanceCmd.class);
|
||||||
|
Mockito.when(cmd.getManagementServerId()).thenReturn(1L);
|
||||||
|
|
||||||
|
Assert.assertThrows(CloudRuntimeException.class, () -> {
|
||||||
|
spy.prepareForMaintenance(cmd);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void prepareForMaintenanceCmdNoIndirectMsHosts() {
|
||||||
|
ManagementServerHostVO msHost1 = mock(ManagementServerHostVO.class);
|
||||||
|
Mockito.when(msHost1.getState()).thenReturn(ManagementServerHost.State.Up);
|
||||||
|
ManagementServerHostVO msHost2 = mock(ManagementServerHostVO.class);
|
||||||
|
List<ManagementServerHostVO> msHostList = new ArrayList<>();
|
||||||
|
msHostList.add(msHost1);
|
||||||
|
msHostList.add(msHost2);
|
||||||
|
Mockito.when(msHostDao.listBy(ManagementServerHost.State.Up)).thenReturn(msHostList);
|
||||||
|
Mockito.when(msHostDao.listBy(ManagementServerHost.State.PreparingForMaintenance, ManagementServerHost.State.PreparingForShutDown)).thenReturn(new ArrayList<>());
|
||||||
|
Mockito.when(msHostDao.findById(1L)).thenReturn(msHost1);
|
||||||
|
Mockito.when(msHostDao.listNonUpStateMsIPs()).thenReturn(new ArrayList<>());
|
||||||
|
PrepareForMaintenanceCmd cmd = mock(PrepareForMaintenanceCmd.class);
|
||||||
|
Mockito.when(cmd.getManagementServerId()).thenReturn(1L);
|
||||||
|
Mockito.when(indirectAgentLBMock.haveAgentBasedHosts(anyLong())).thenReturn(true);
|
||||||
|
Mockito.when(indirectAgentLBMock.getManagementServerList()).thenReturn(new ArrayList<>());
|
||||||
|
|
||||||
|
Assert.assertThrows(CloudRuntimeException.class, () -> {
|
||||||
|
spy.prepareForMaintenance(cmd);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void prepareForMaintenanceCmdNullResponseFromClusterManager() {
|
||||||
|
ManagementServerHostVO msHost1 = mock(ManagementServerHostVO.class);
|
||||||
|
Mockito.when(msHost1.getState()).thenReturn(ManagementServerHost.State.Up);
|
||||||
|
ManagementServerHostVO msHost2 = mock(ManagementServerHostVO.class);
|
||||||
|
List<ManagementServerHostVO> msHostList = new ArrayList<>();
|
||||||
|
msHostList.add(msHost1);
|
||||||
|
msHostList.add(msHost2);
|
||||||
|
Mockito.when(msHostDao.listBy(ManagementServerHost.State.Up)).thenReturn(msHostList);
|
||||||
|
Mockito.when(msHostDao.listBy(ManagementServerHost.State.PreparingForMaintenance, ManagementServerHost.State.PreparingForShutDown)).thenReturn(new ArrayList<>());
|
||||||
|
Mockito.when(msHostDao.findById(1L)).thenReturn(msHost1);
|
||||||
|
PrepareForMaintenanceCmd cmd = mock(PrepareForMaintenanceCmd.class);
|
||||||
|
Mockito.when(cmd.getManagementServerId()).thenReturn(1L);
|
||||||
|
Mockito.when(indirectAgentLBMock.haveAgentBasedHosts(anyLong())).thenReturn(false);
|
||||||
|
Mockito.when(clusterManagerMock.execute(anyString(), anyLong(), anyString(), anyBoolean())).thenReturn(null);
|
||||||
|
|
||||||
|
Assert.assertThrows(CloudRuntimeException.class, () -> {
|
||||||
|
spy.prepareForMaintenance(cmd);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void prepareForMaintenanceCmdFailedResponseFromClusterManager() {
|
||||||
|
ManagementServerHostVO msHost1 = mock(ManagementServerHostVO.class);
|
||||||
|
Mockito.when(msHost1.getState()).thenReturn(ManagementServerHost.State.Up);
|
||||||
|
ManagementServerHostVO msHost2 = mock(ManagementServerHostVO.class);
|
||||||
|
List<ManagementServerHostVO> msHostList = new ArrayList<>();
|
||||||
|
msHostList.add(msHost1);
|
||||||
|
msHostList.add(msHost2);
|
||||||
|
Mockito.when(msHostDao.listBy(ManagementServerHost.State.Up)).thenReturn(msHostList);
|
||||||
|
Mockito.when(msHostDao.listBy(ManagementServerHost.State.PreparingForMaintenance, ManagementServerHost.State.PreparingForShutDown)).thenReturn(new ArrayList<>());
|
||||||
|
Mockito.when(msHostDao.findById(1L)).thenReturn(msHost1);
|
||||||
|
PrepareForMaintenanceCmd cmd = mock(PrepareForMaintenanceCmd.class);
|
||||||
|
Mockito.when(cmd.getManagementServerId()).thenReturn(1L);
|
||||||
|
Mockito.when(indirectAgentLBMock.haveAgentBasedHosts(anyLong())).thenReturn(false);
|
||||||
|
Mockito.when(clusterManagerMock.execute(anyString(), anyLong(), anyString(), anyBoolean())).thenReturn("Failed");
|
||||||
|
|
||||||
|
Assert.assertThrows(CloudRuntimeException.class, () -> {
|
||||||
|
spy.prepareForMaintenance(cmd);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void prepareForMaintenanceCmdSuccessResponseFromClusterManager() {
|
||||||
|
ManagementServerHostVO msHost1 = mock(ManagementServerHostVO.class);
|
||||||
|
Mockito.when(msHost1.getState()).thenReturn(ManagementServerHost.State.Up);
|
||||||
|
ManagementServerHostVO msHost2 = mock(ManagementServerHostVO.class);
|
||||||
|
List<ManagementServerHostVO> msHostList = new ArrayList<>();
|
||||||
|
msHostList.add(msHost1);
|
||||||
|
msHostList.add(msHost2);
|
||||||
|
Mockito.when(msHostDao.listBy(ManagementServerHost.State.Up)).thenReturn(msHostList);
|
||||||
|
Mockito.when(msHostDao.listBy(ManagementServerHost.State.PreparingForMaintenance, ManagementServerHost.State.PreparingForShutDown)).thenReturn(new ArrayList<>());
|
||||||
|
Mockito.when(msHostDao.findById(1L)).thenReturn(msHost1);
|
||||||
|
PrepareForMaintenanceCmd cmd = mock(PrepareForMaintenanceCmd.class);
|
||||||
|
Mockito.when(cmd.getManagementServerId()).thenReturn(1L);
|
||||||
|
Mockito.when(indirectAgentLBMock.haveAgentBasedHosts(anyLong())).thenReturn(false);
|
||||||
|
Mockito.when(hostDao.listByMs(anyLong())).thenReturn(new ArrayList<>());
|
||||||
|
Mockito.when(clusterManagerMock.execute(anyString(), anyLong(), anyString(), anyBoolean())).thenReturn("Success");
|
||||||
|
|
||||||
|
spy.prepareForMaintenance(cmd);
|
||||||
|
Mockito.verify(clusterManagerMock, Mockito.times(1)).execute(anyString(), anyLong(), anyString(), anyBoolean());
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void cancelMaintenanceCmdNoMsHost() {
|
||||||
|
Mockito.when(msHostDao.findById(1L)).thenReturn(null);
|
||||||
|
CancelMaintenanceCmd cmd = mock(CancelMaintenanceCmd.class);
|
||||||
|
Mockito.when(cmd.getManagementServerId()).thenReturn(1L);
|
||||||
|
|
||||||
|
Assert.assertThrows(CloudRuntimeException.class, () -> {
|
||||||
|
spy.cancelMaintenance(cmd);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void cancelMaintenanceCmdMsHostNotInMaintenanceState() {
|
||||||
|
ManagementServerHostVO msHost = mock(ManagementServerHostVO.class);
|
||||||
|
Mockito.when(msHost.getState()).thenReturn(ManagementServerHost.State.Up);
|
||||||
|
Mockito.when(msHostDao.findById(1L)).thenReturn(msHost);
|
||||||
|
CancelMaintenanceCmd cmd = mock(CancelMaintenanceCmd.class);
|
||||||
|
Mockito.when(cmd.getManagementServerId()).thenReturn(1L);
|
||||||
|
|
||||||
|
Assert.assertThrows(CloudRuntimeException.class, () -> {
|
||||||
|
spy.cancelMaintenance(cmd);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
@Test
|
||||||
|
public void cancelMaintenanceCmd() {
|
||||||
|
ManagementServerHostVO msHost = mock(ManagementServerHostVO.class);
|
||||||
|
Mockito.when(msHost.getState()).thenReturn(ManagementServerHost.State.Maintenance);
|
||||||
|
Mockito.when(msHostDao.findById(1L)).thenReturn(msHost);
|
||||||
|
CancelMaintenanceCmd cmd = mock(CancelMaintenanceCmd.class);
|
||||||
|
Mockito.when(cmd.getManagementServerId()).thenReturn(1L);
|
||||||
|
Mockito.when(clusterManagerMock.execute(anyString(), anyLong(), anyString(), anyBoolean())).thenReturn("Success");
|
||||||
|
|
||||||
|
spy.cancelMaintenance(cmd);
|
||||||
|
Mockito.verify(clusterManagerMock, Mockito.times(1)).execute(anyString(), anyLong(), anyString(), anyBoolean());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -31,11 +31,11 @@ public class ManagementServerMetricsResponse extends ManagementServerResponse {
|
||||||
private Integer availableProcessors;
|
private Integer availableProcessors;
|
||||||
|
|
||||||
@SerializedName(MetricConstants.LAST_AGENTS)
|
@SerializedName(MetricConstants.LAST_AGENTS)
|
||||||
@Param(description = "the last agents this Management Server is responsible for, before preparing for maintenance", since = "4.18.1")
|
@Param(description = "the last agents this Management Server is responsible for, before shutdown or preparing for maintenance", since = "4.21.0.0")
|
||||||
private List<String> lastAgents;
|
private List<String> lastAgents;
|
||||||
|
|
||||||
@SerializedName(MetricConstants.AGENTS)
|
@SerializedName(MetricConstants.AGENTS)
|
||||||
@Param(description = "the agents this Management Server is responsible for", since = "4.18.1")
|
@Param(description = "the agents this Management Server is responsible for", since = "4.21.0.0")
|
||||||
private List<String> agents;
|
private List<String> agents;
|
||||||
|
|
||||||
@SerializedName(MetricConstants.AGENT_COUNT)
|
@SerializedName(MetricConstants.AGENT_COUNT)
|
||||||
|
|
|
||||||
|
|
@ -94,7 +94,7 @@ public class ApiDispatcher {
|
||||||
if (asyncJobManager.isAsyncJobsEnabled()) {
|
if (asyncJobManager.isAsyncJobsEnabled()) {
|
||||||
asyncCreationDispatchChain.dispatch(new DispatchTask(cmd, params));
|
asyncCreationDispatchChain.dispatch(new DispatchTask(cmd, params));
|
||||||
} else {
|
} else {
|
||||||
throw new CloudRuntimeException("Maintenance or Shutdown has been initiated on this management server. Can not accept new jobs");
|
throw new CloudRuntimeException("Maintenance or Shutdown has been initiated on this management server. Can not accept new async creation jobs");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -750,6 +750,11 @@ public class ApiServer extends ManagerBase implements HttpRequestHandler, ApiSer
|
||||||
// BaseAsyncCreateCmd: cmd params are processed and create() is called, then same workflow as BaseAsyncCmd.
|
// BaseAsyncCreateCmd: cmd params are processed and create() is called, then same workflow as BaseAsyncCmd.
|
||||||
// BaseAsyncCmd: cmd is processed and submitted as an AsyncJob, job related info is serialized and returned.
|
// BaseAsyncCmd: cmd is processed and submitted as an AsyncJob, job related info is serialized and returned.
|
||||||
if (cmdObj instanceof BaseAsyncCmd) {
|
if (cmdObj instanceof BaseAsyncCmd) {
|
||||||
|
if (!asyncMgr.isAsyncJobsEnabled()) {
|
||||||
|
String msg = "Maintenance or Shutdown has been initiated on this management server. Can not accept new jobs";
|
||||||
|
logger.warn(msg);
|
||||||
|
throw new ServerApiException(ApiErrorCode.SERVICE_UNAVAILABLE, msg);
|
||||||
|
}
|
||||||
Long objectId = null;
|
Long objectId = null;
|
||||||
String objectUuid = null;
|
String objectUuid = null;
|
||||||
if (cmdObj instanceof BaseAsyncCreateCmd) {
|
if (cmdObj instanceof BaseAsyncCreateCmd) {
|
||||||
|
|
|
||||||
|
|
@ -5445,7 +5445,11 @@ public class QueryManagerImpl extends MutualExclusiveIdsManagerBase implements Q
|
||||||
mgmtResponse.addPeer(createPeerManagementServerNodeResponse(peer));
|
mgmtResponse.addPeer(createPeerManagementServerNodeResponse(peer));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
mgmtResponse.setAgentsCount((long) hostDao.countByMs(mgmt.getMsid()));
|
List<String> lastAgents = hostDao.listByLastMs(mgmt.getMsid());
|
||||||
|
mgmtResponse.setLastAgents(lastAgents);
|
||||||
|
List<String> agents = hostDao.listByMs(mgmt.getMsid());
|
||||||
|
mgmtResponse.setAgents(agents);
|
||||||
|
mgmtResponse.setAgentsCount((long) agents.size());
|
||||||
mgmtResponse.setPendingJobsCount(jobManager.countPendingNonPseudoJobs(mgmt.getMsid()));
|
mgmtResponse.setPendingJobsCount(jobManager.countPendingNonPseudoJobs(mgmt.getMsid()));
|
||||||
mgmtResponse.setIpAddress(mgmt.getServiceIP());
|
mgmtResponse.setIpAddress(mgmt.getServiceIP());
|
||||||
mgmtResponse.setObjectName("managementserver");
|
mgmtResponse.setObjectName("managementserver");
|
||||||
|
|
|
||||||
|
|
@ -752,21 +752,21 @@ public class StatsCollector extends ManagerBase implements ComponentMethodInterc
|
||||||
logger.debug(String.format("%s is running...", this.getClass().getSimpleName()));
|
logger.debug(String.format("%s is running...", this.getClass().getSimpleName()));
|
||||||
long msid = ManagementServerNode.getManagementServerId();
|
long msid = ManagementServerNode.getManagementServerId();
|
||||||
ManagementServerHostVO mshost = null;
|
ManagementServerHostVO mshost = null;
|
||||||
ManagementServerHostStatsEntry hostStatsEntry = null;
|
ManagementServerHostStatsEntry msHostStatsEntry = null;
|
||||||
try {
|
try {
|
||||||
mshost = managementServerHostDao.findByMsid(msid);
|
mshost = managementServerHostDao.findByMsid(msid);
|
||||||
// get local data
|
// get local data
|
||||||
hostStatsEntry = getDataFrom(mshost);
|
msHostStatsEntry = getDataFrom(mshost);
|
||||||
managementServerHostStats.put(mshost.getUuid(), hostStatsEntry);
|
managementServerHostStats.put(mshost.getUuid(), msHostStatsEntry);
|
||||||
// send to other hosts
|
// send to other hosts
|
||||||
clusterManager.publishStatus(gson.toJson(hostStatsEntry));
|
clusterManager.publishStatus(gson.toJson(msHostStatsEntry));
|
||||||
} catch (Throwable t) {
|
} catch (Throwable t) {
|
||||||
// pokemon catch to make sure the thread stays running
|
// pokemon catch to make sure the thread stays running
|
||||||
logger.error("Error trying to retrieve management server host statistics", t);
|
logger.error("Error trying to retrieve management server host statistics", t);
|
||||||
}
|
}
|
||||||
try {
|
try {
|
||||||
// send to DB
|
// send to DB
|
||||||
storeStatus(hostStatsEntry, mshost);
|
storeStatus(msHostStatsEntry, mshost);
|
||||||
} catch (Throwable t) {
|
} catch (Throwable t) {
|
||||||
// pokemon catch to make sure the thread stays running
|
// pokemon catch to make sure the thread stays running
|
||||||
logger.error("Error trying to store management server host statistics", t);
|
logger.error("Error trying to store management server host statistics", t);
|
||||||
|
|
@ -834,11 +834,11 @@ public class StatsCollector extends ManagerBase implements ComponentMethodInterc
|
||||||
}
|
}
|
||||||
|
|
||||||
private void getDataBaseStatistics(ManagementServerHostStatsEntry newEntry, long msid) {
|
private void getDataBaseStatistics(ManagementServerHostStatsEntry newEntry, long msid) {
|
||||||
newEntry.setLastAgents(_agentMgr.getLastAgents());
|
List<String> lastAgents = _hostDao.listByLastMs(msid);
|
||||||
|
newEntry.setLastAgents(lastAgents);
|
||||||
List<String> agents = _hostDao.listByMs(msid);
|
List<String> agents = _hostDao.listByMs(msid);
|
||||||
newEntry.setAgents(agents);
|
newEntry.setAgents(agents);
|
||||||
int count = _hostDao.countByMs(msid);
|
newEntry.setAgentCount(agents.size());
|
||||||
newEntry.setAgentCount(count);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
private void getMemoryData(@NotNull ManagementServerHostStatsEntry newEntry) {
|
private void getMemoryData(@NotNull ManagementServerHostStatsEntry newEntry) {
|
||||||
|
|
|
||||||
|
|
@ -19,10 +19,13 @@ package org.apache.cloudstack.agent.lb;
|
||||||
import java.util.ArrayList;
|
import java.util.ArrayList;
|
||||||
import java.util.Arrays;
|
import java.util.Arrays;
|
||||||
import java.util.Comparator;
|
import java.util.Comparator;
|
||||||
|
import java.util.EnumSet;
|
||||||
import java.util.HashMap;
|
import java.util.HashMap;
|
||||||
import java.util.List;
|
import java.util.List;
|
||||||
import java.util.Map;
|
import java.util.Map;
|
||||||
import java.util.EnumSet;
|
import java.util.concurrent.ExecutorService;
|
||||||
|
import java.util.concurrent.Executors;
|
||||||
|
import java.util.concurrent.TimeUnit;
|
||||||
|
|
||||||
import javax.inject.Inject;
|
import javax.inject.Inject;
|
||||||
import javax.naming.ConfigurationException;
|
import javax.naming.ConfigurationException;
|
||||||
|
|
@ -33,6 +36,8 @@ import org.apache.cloudstack.agent.lb.algorithm.IndirectAgentLBStaticAlgorithm;
|
||||||
import org.apache.cloudstack.config.ApiServiceConfiguration;
|
import org.apache.cloudstack.config.ApiServiceConfiguration;
|
||||||
import org.apache.cloudstack.framework.config.ConfigKey;
|
import org.apache.cloudstack.framework.config.ConfigKey;
|
||||||
import org.apache.cloudstack.framework.config.Configurable;
|
import org.apache.cloudstack.framework.config.Configurable;
|
||||||
|
import org.apache.cloudstack.managed.context.ManagedContextRunnable;
|
||||||
|
import org.apache.commons.collections.CollectionUtils;
|
||||||
import org.apache.commons.lang3.StringUtils;
|
import org.apache.commons.lang3.StringUtils;
|
||||||
|
|
||||||
import com.cloud.agent.AgentManager;
|
import com.cloud.agent.AgentManager;
|
||||||
|
|
@ -40,6 +45,7 @@ import com.cloud.agent.api.Answer;
|
||||||
import com.cloud.agent.api.MigrateAgentConnectionCommand;
|
import com.cloud.agent.api.MigrateAgentConnectionCommand;
|
||||||
import com.cloud.cluster.ManagementServerHostVO;
|
import com.cloud.cluster.ManagementServerHostVO;
|
||||||
import com.cloud.cluster.dao.ManagementServerHostDao;
|
import com.cloud.cluster.dao.ManagementServerHostDao;
|
||||||
|
import com.cloud.dc.DataCenter;
|
||||||
import com.cloud.dc.DataCenterVO;
|
import com.cloud.dc.DataCenterVO;
|
||||||
import com.cloud.dc.dao.ClusterDao;
|
import com.cloud.dc.dao.ClusterDao;
|
||||||
import com.cloud.dc.dao.DataCenterDao;
|
import com.cloud.dc.dao.DataCenterDao;
|
||||||
|
|
@ -49,20 +55,20 @@ import com.cloud.host.dao.HostDao;
|
||||||
import com.cloud.hypervisor.Hypervisor;
|
import com.cloud.hypervisor.Hypervisor;
|
||||||
import com.cloud.resource.ResourceState;
|
import com.cloud.resource.ResourceState;
|
||||||
import com.cloud.utils.component.ComponentLifecycleBase;
|
import com.cloud.utils.component.ComponentLifecycleBase;
|
||||||
|
import com.cloud.utils.concurrency.NamedThreadFactory;
|
||||||
import com.cloud.utils.exception.CloudRuntimeException;
|
import com.cloud.utils.exception.CloudRuntimeException;
|
||||||
|
|
||||||
import org.apache.commons.collections.CollectionUtils;
|
|
||||||
|
|
||||||
public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implements IndirectAgentLB, Configurable {
|
public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implements IndirectAgentLB, Configurable {
|
||||||
|
|
||||||
public static final ConfigKey<String> IndirectAgentLBAlgorithm = new ConfigKey<>(String.class,
|
public static final ConfigKey<String> IndirectAgentLBAlgorithm = new ConfigKey<>(String.class,
|
||||||
"indirect.agent.lb.algorithm", "Advanced", "static",
|
"indirect.agent.lb.algorithm", "Advanced", "static",
|
||||||
"The algorithm to be applied on the provided 'host' management server list that is sent to indirect agents. Allowed values are: static, roundrobin and shuffle.",
|
"The algorithm to be applied on the provided management server list in the 'host' config that that is sent to indirect agents. Allowed values are: static, roundrobin and shuffle.",
|
||||||
true, ConfigKey.Scope.Global, null, null, null, null, null, ConfigKey.Kind.Select, "static,roundrobin,shuffle");
|
true, ConfigKey.Scope.Global, null, null, null, null, null, ConfigKey.Kind.Select, "static,roundrobin,shuffle");
|
||||||
|
|
||||||
public static final ConfigKey<Long> IndirectAgentLBCheckInterval = new ConfigKey<>("Advanced", Long.class,
|
public static final ConfigKey<Long> IndirectAgentLBCheckInterval = new ConfigKey<>("Advanced", Long.class,
|
||||||
"indirect.agent.lb.check.interval", "0",
|
"indirect.agent.lb.check.interval", "0",
|
||||||
"The interval in seconds after which agent should check and try to connect to its preferred host. Set 0 to disable it.",
|
"The interval in seconds after which indirect agent should check and try to connect to its preferred host (the first management server from the propagated list provided in the 'host' config)." +
|
||||||
|
" Set 0 to disable it.",
|
||||||
true, ConfigKey.Scope.Cluster);
|
true, ConfigKey.Scope.Cluster);
|
||||||
|
|
||||||
private static Map<String, org.apache.cloudstack.agent.lb.IndirectAgentLBAlgorithm> algorithmMap = new HashMap<>();
|
private static Map<String, org.apache.cloudstack.agent.lb.IndirectAgentLBAlgorithm> algorithmMap = new HashMap<>();
|
||||||
|
|
@ -85,6 +91,8 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement
|
||||||
ResourceState.ErrorInMaintenance, ResourceState.PrepareForMaintenance);
|
ResourceState.ErrorInMaintenance, ResourceState.PrepareForMaintenance);
|
||||||
private static final List<Host.Type> agentValidHostTypes = List.of(Host.Type.Routing, Host.Type.ConsoleProxy,
|
private static final List<Host.Type> agentValidHostTypes = List.of(Host.Type.Routing, Host.Type.ConsoleProxy,
|
||||||
Host.Type.SecondaryStorage, Host.Type.SecondaryStorageVM);
|
Host.Type.SecondaryStorage, Host.Type.SecondaryStorageVM);
|
||||||
|
private static final List<Host.Type> agentNonRoutingHostTypes = List.of(Host.Type.ConsoleProxy,
|
||||||
|
Host.Type.SecondaryStorage, Host.Type.SecondaryStorageVM);
|
||||||
private static final List<Hypervisor.HypervisorType> agentValidHypervisorTypes = List.of(
|
private static final List<Hypervisor.HypervisorType> agentValidHypervisorTypes = List.of(
|
||||||
Hypervisor.HypervisorType.KVM, Hypervisor.HypervisorType.LXC);
|
Hypervisor.HypervisorType.KVM, Hypervisor.HypervisorType.LXC);
|
||||||
|
|
||||||
|
|
@ -246,8 +254,18 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement
|
||||||
agentBasedHosts.add(host);
|
agentBasedHosts.add(host);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private List<Long> getAllAgentBasedNonRoutingHostsFromDB(final Long zoneId, final Long msId) {
|
||||||
|
return hostDao.findHostIdsByZoneClusterResourceStateTypeAndHypervisorType(zoneId, null, msId,
|
||||||
|
agentValidResourceStates, agentNonRoutingHostTypes, agentValidHypervisorTypes);
|
||||||
|
}
|
||||||
|
|
||||||
|
private List<Long> getAllAgentBasedRoutingHostsFromDB(final Long zoneId, final Long clusterId, final Long msId) {
|
||||||
|
return hostDao.findHostIdsByZoneClusterResourceStateTypeAndHypervisorType(zoneId, clusterId, msId,
|
||||||
|
agentValidResourceStates, List.of(Host.Type.Routing), agentValidHypervisorTypes);
|
||||||
|
}
|
||||||
|
|
||||||
private List<Long> getAllAgentBasedHostsFromDB(final Long zoneId, final Long clusterId) {
|
private List<Long> getAllAgentBasedHostsFromDB(final Long zoneId, final Long clusterId) {
|
||||||
return hostDao.findHostIdsByZoneClusterResourceStateTypeAndHypervisorType(zoneId, clusterId,
|
return hostDao.findHostIdsByZoneClusterResourceStateTypeAndHypervisorType(zoneId, clusterId, null,
|
||||||
agentValidResourceStates, agentValidHostTypes, agentValidHypervisorTypes);
|
agentValidResourceStates, agentValidHostTypes, agentValidHypervisorTypes);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -287,31 +305,159 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement
|
||||||
@Override
|
@Override
|
||||||
public void propagateMSListToAgents() {
|
public void propagateMSListToAgents() {
|
||||||
logger.debug("Propagating management server list update to agents");
|
logger.debug("Propagating management server list update to agents");
|
||||||
|
ExecutorService setupMSListExecutorService = Executors.newFixedThreadPool(10, new NamedThreadFactory("SetupMSList-Worker"));
|
||||||
final String lbAlgorithm = getLBAlgorithmName();
|
final String lbAlgorithm = getLBAlgorithmName();
|
||||||
|
final Long globalLbCheckInterval = getLBPreferredHostCheckInterval(null);
|
||||||
List<DataCenterVO> zones = dataCenterDao.listAll();
|
List<DataCenterVO> zones = dataCenterDao.listAll();
|
||||||
for (DataCenterVO zone : zones) {
|
for (DataCenterVO zone : zones) {
|
||||||
List<Long> zoneHostIds = new ArrayList<>();
|
List<Long> zoneHostIds = new ArrayList<>();
|
||||||
|
List<Long> nonRoutingHostIds = getAllAgentBasedNonRoutingHostsFromDB(zone.getId(), null);
|
||||||
|
zoneHostIds.addAll(nonRoutingHostIds);
|
||||||
Map<Long, List<Long>> clusterHostIdsMap = new HashMap<>();
|
Map<Long, List<Long>> clusterHostIdsMap = new HashMap<>();
|
||||||
List<Long> clusterIds = clusterDao.listAllClusterIds(zone.getId());
|
List<Long> clusterIds = clusterDao.listAllClusterIds(zone.getId());
|
||||||
for (Long clusterId : clusterIds) {
|
for (Long clusterId : clusterIds) {
|
||||||
List<Long> hostIds = getAllAgentBasedHostsFromDB(zone.getId(), clusterId);
|
List<Long> hostIds = getAllAgentBasedRoutingHostsFromDB(zone.getId(), clusterId, null);
|
||||||
clusterHostIdsMap.put(clusterId, hostIds);
|
clusterHostIdsMap.put(clusterId, hostIds);
|
||||||
zoneHostIds.addAll(hostIds);
|
zoneHostIds.addAll(hostIds);
|
||||||
}
|
}
|
||||||
zoneHostIds.sort(Comparator.comparingLong(x -> x));
|
zoneHostIds.sort(Comparator.comparingLong(x -> x));
|
||||||
|
final List<String> avoidMsList = mshostDao.listNonUpStateMsIPs();
|
||||||
|
for (Long nonRoutingHostId : nonRoutingHostIds) {
|
||||||
|
setupMSListExecutorService.submit(new SetupMSListTask(nonRoutingHostId, zone.getId(), zoneHostIds, avoidMsList, lbAlgorithm, globalLbCheckInterval));
|
||||||
|
}
|
||||||
for (Long clusterId : clusterIds) {
|
for (Long clusterId : clusterIds) {
|
||||||
final Long lbCheckInterval = getLBPreferredHostCheckInterval(clusterId);
|
final Long clusterLbCheckInterval = getLBPreferredHostCheckInterval(clusterId);
|
||||||
List<Long> hostIds = clusterHostIdsMap.get(clusterId);
|
List<Long> hostIds = clusterHostIdsMap.get(clusterId);
|
||||||
for (Long hostId : hostIds) {
|
for (Long hostId : hostIds) {
|
||||||
final List<String> msList = getManagementServerList(hostId, zone.getId(), zoneHostIds);
|
setupMSListExecutorService.submit(new SetupMSListTask(hostId, zone.getId(), zoneHostIds, avoidMsList, lbAlgorithm, clusterLbCheckInterval));
|
||||||
final SetupMSListCommand cmd = new SetupMSListCommand(msList, lbAlgorithm, lbCheckInterval);
|
|
||||||
final Answer answer = agentManager.easySend(hostId, cmd);
|
|
||||||
if (answer == null || !answer.getResult()) {
|
|
||||||
logger.warn("Failed to setup management servers list to the agent of ID: {}", hostId);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
setupMSListExecutorService.shutdown();
|
||||||
|
try {
|
||||||
|
if (!setupMSListExecutorService.awaitTermination(300, TimeUnit.SECONDS)) {
|
||||||
|
setupMSListExecutorService.shutdownNow();
|
||||||
|
}
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
setupMSListExecutorService.shutdownNow();
|
||||||
|
logger.debug(String.format("Force shutdown setup ms list service as it did not shutdown in the desired time due to: %s", e.getMessage()));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
private final class SetupMSListTask extends ManagedContextRunnable {
|
||||||
|
private Long hostId;
|
||||||
|
private Long dcId;
|
||||||
|
private List<Long> orderedHostIdList;
|
||||||
|
private List<String> avoidMsList;
|
||||||
|
private String lbAlgorithm;
|
||||||
|
private Long lbCheckInterval;
|
||||||
|
|
||||||
|
public SetupMSListTask(Long hostId, Long dcId, List<Long> orderedHostIdList, List<String> avoidMsList,
|
||||||
|
String lbAlgorithm, Long lbCheckInterval) {
|
||||||
|
this.hostId = hostId;
|
||||||
|
this.dcId = dcId;
|
||||||
|
this.orderedHostIdList = orderedHostIdList;
|
||||||
|
this.avoidMsList = avoidMsList;
|
||||||
|
this.lbAlgorithm = lbAlgorithm;
|
||||||
|
this.lbCheckInterval = lbCheckInterval;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void runInContext() {
|
||||||
|
final List<String> msList = getManagementServerList(hostId, dcId, orderedHostIdList);
|
||||||
|
final SetupMSListCommand cmd = new SetupMSListCommand(msList, avoidMsList, lbAlgorithm, lbCheckInterval);
|
||||||
|
cmd.setWait(60);
|
||||||
|
final Answer answer = agentManager.easySend(hostId, cmd);
|
||||||
|
if (answer == null || !answer.getResult()) {
|
||||||
|
logger.warn(String.format("Failed to setup management servers list to the agent of ID: %d", hostId));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
protected boolean migrateNonRoutingHostAgentsInZone(String fromMsUuid, long fromMsId, DataCenter dc,
|
||||||
|
long migrationStartTimeInMs, long timeoutDurationInMs, final List<String> avoidMsList, String lbAlgorithm,
|
||||||
|
boolean lbAlgorithmChanged, List<Long> orderedHostIdList) {
|
||||||
|
List<Long> systemVmAgentsInDc = getAllAgentBasedNonRoutingHostsFromDB(dc.getId(), fromMsId);
|
||||||
|
if (CollectionUtils.isEmpty(systemVmAgentsInDc)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
logger.debug(String.format("Migrating %d non-routing host agents from management server node %d (id: %s) of zone %s",
|
||||||
|
systemVmAgentsInDc.size(), fromMsId, fromMsUuid, dc));
|
||||||
|
ExecutorService migrateAgentsExecutorService = Executors.newFixedThreadPool(5, new NamedThreadFactory("MigrateNonRoutingHostAgent-Worker"));
|
||||||
|
Long lbCheckInterval = getLBPreferredHostCheckInterval(null);
|
||||||
|
boolean stopMigration = false;
|
||||||
|
for (final Long hostId : systemVmAgentsInDc) {
|
||||||
|
long migrationElapsedTimeInMs = System.currentTimeMillis() - migrationStartTimeInMs;
|
||||||
|
if (migrationElapsedTimeInMs >= timeoutDurationInMs) {
|
||||||
|
logger.debug(String.format("Stop migrating remaining non-routing host agents from management server node %d (id: %s), timed out", fromMsId, fromMsUuid));
|
||||||
|
stopMigration = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
migrateAgentsExecutorService.submit(new MigrateAgentConnectionTask(fromMsId, hostId, dc.getId(), orderedHostIdList, avoidMsList, lbCheckInterval, lbAlgorithm, lbAlgorithmChanged));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (stopMigration) {
|
||||||
|
migrateAgentsExecutorService.shutdownNow();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
migrateAgentsExecutorService.shutdown();
|
||||||
|
long pendingTimeoutDurationInMs = timeoutDurationInMs - (System.currentTimeMillis() - migrationStartTimeInMs);
|
||||||
|
try {
|
||||||
|
if (pendingTimeoutDurationInMs <= 0 || !migrateAgentsExecutorService.awaitTermination(pendingTimeoutDurationInMs, TimeUnit.MILLISECONDS)) {
|
||||||
|
migrateAgentsExecutorService.shutdownNow();
|
||||||
|
}
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
migrateAgentsExecutorService.shutdownNow();
|
||||||
|
logger.debug(String.format("Force shutdown migrate non-routing agents service as it did not shutdown in the desired time due to: %s", e.getMessage()));
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
protected boolean migrateRoutingHostAgentsInCluster(long clusterId, String fromMsUuid, long fromMsId, DataCenter dc,
|
||||||
|
long migrationStartTimeInMs, long timeoutDurationInMs, final List<String> avoidMsList, String lbAlgorithm,
|
||||||
|
boolean lbAlgorithmChanged, List<Long> orderedHostIdList) {
|
||||||
|
|
||||||
|
List<Long> agentBasedHostsOfMsInDcAndCluster = getAllAgentBasedRoutingHostsFromDB(dc.getId(), clusterId, fromMsId);
|
||||||
|
if (CollectionUtils.isEmpty(agentBasedHostsOfMsInDcAndCluster)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
logger.debug(String.format("Migrating %d indirect routing host agents from management server node %d (id: %s) of zone %s, " +
|
||||||
|
"cluster ID: %d", agentBasedHostsOfMsInDcAndCluster.size(), fromMsId, fromMsUuid, dc, clusterId));
|
||||||
|
ExecutorService migrateAgentsExecutorService = Executors.newFixedThreadPool(10, new NamedThreadFactory("MigrateRoutingHostAgent-Worker"));
|
||||||
|
Long lbCheckInterval = getLBPreferredHostCheckInterval(clusterId);
|
||||||
|
boolean stopMigration = false;
|
||||||
|
for (final Long hostId : agentBasedHostsOfMsInDcAndCluster) {
|
||||||
|
long migrationElapsedTimeInMs = System.currentTimeMillis() - migrationStartTimeInMs;
|
||||||
|
if (migrationElapsedTimeInMs >= timeoutDurationInMs) {
|
||||||
|
logger.debug(String.format("Stop migrating remaining indirect routing host agents from management server node %d (id: %s), timed out", fromMsId, fromMsUuid));
|
||||||
|
stopMigration = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
|
||||||
|
migrateAgentsExecutorService.submit(new MigrateAgentConnectionTask(fromMsId, hostId, dc.getId(), orderedHostIdList, avoidMsList, lbCheckInterval, lbAlgorithm, lbAlgorithmChanged));
|
||||||
|
}
|
||||||
|
|
||||||
|
if (stopMigration) {
|
||||||
|
migrateAgentsExecutorService.shutdownNow();
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
|
||||||
|
migrateAgentsExecutorService.shutdown();
|
||||||
|
long pendingTimeoutDurationInMs = timeoutDurationInMs - (System.currentTimeMillis() - migrationStartTimeInMs);
|
||||||
|
try {
|
||||||
|
if (pendingTimeoutDurationInMs <= 0 || !migrateAgentsExecutorService.awaitTermination(pendingTimeoutDurationInMs, TimeUnit.MILLISECONDS)) {
|
||||||
|
migrateAgentsExecutorService.shutdownNow();
|
||||||
|
}
|
||||||
|
} catch (InterruptedException e) {
|
||||||
|
migrateAgentsExecutorService.shutdownNow();
|
||||||
|
logger.debug(String.format("Force shutdown migrate routing agents service as it did not shutdown in the desired time due to: %s", e.getMessage()));
|
||||||
|
}
|
||||||
|
|
||||||
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
@Override
|
@Override
|
||||||
|
|
@ -322,7 +468,7 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement
|
||||||
}
|
}
|
||||||
|
|
||||||
logger.debug(String.format("Migrating indirect agents from management server node %d (id: %s) to other nodes", fromMsId, fromMsUuid));
|
logger.debug(String.format("Migrating indirect agents from management server node %d (id: %s) to other nodes", fromMsId, fromMsUuid));
|
||||||
long migrationStartTime = System.currentTimeMillis();
|
long migrationStartTimeInMs = System.currentTimeMillis();
|
||||||
if (!haveAgentBasedHosts(fromMsId)) {
|
if (!haveAgentBasedHosts(fromMsId)) {
|
||||||
logger.info(String.format("No indirect agents available on management server node %d (id: %s), to migrate", fromMsId, fromMsUuid));
|
logger.info(String.format("No indirect agents available on management server node %d (id: %s), to migrate", fromMsId, fromMsUuid));
|
||||||
return true;
|
return true;
|
||||||
|
|
@ -342,37 +488,75 @@ public class IndirectAgentLBServiceImpl extends ComponentLifecycleBase implement
|
||||||
|
|
||||||
List<DataCenterVO> dataCenterList = dcDao.listAll();
|
List<DataCenterVO> dataCenterList = dcDao.listAll();
|
||||||
for (DataCenterVO dc : dataCenterList) {
|
for (DataCenterVO dc : dataCenterList) {
|
||||||
Long dcId = dc.getId();
|
if (!migrateAgentsInZone(dc, fromMsUuid, fromMsId, avoidMsList, lbAlgorithm, lbAlgorithmChanged,
|
||||||
List<Long> orderedHostIdList = getOrderedHostIdList(dcId);
|
migrationStartTimeInMs, timeoutDurationInMs)) {
|
||||||
List<Host> agentBasedHostsOfMsInDc = getAllAgentBasedHostsInDc(fromMsId, dcId);
|
return false;
|
||||||
if (CollectionUtils.isEmpty(agentBasedHostsOfMsInDc)) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
logger.debug(String.format("Migrating %d indirect agents from management server node %d (id: %s) of zone %s", agentBasedHostsOfMsInDc.size(), fromMsId, fromMsUuid, dc));
|
|
||||||
for (final Host host : agentBasedHostsOfMsInDc) {
|
|
||||||
long migrationElapsedTimeInMs = System.currentTimeMillis() - migrationStartTime;
|
|
||||||
if (migrationElapsedTimeInMs >= timeoutDurationInMs) {
|
|
||||||
logger.debug(String.format("Stop migrating remaining indirect agents from management server node %d (id: %s), timed out", fromMsId, fromMsUuid));
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
|
|
||||||
List<String> msList = null;
|
|
||||||
Long lbCheckInterval = 0L;
|
|
||||||
if (lbAlgorithmChanged) {
|
|
||||||
// send new MS list when there is change in lb algorithm
|
|
||||||
msList = getManagementServerList(host.getId(), dcId, orderedHostIdList, lbAlgorithm);
|
|
||||||
lbCheckInterval = getLBPreferredHostCheckInterval(host.getClusterId());
|
|
||||||
}
|
|
||||||
|
|
||||||
final MigrateAgentConnectionCommand cmd = new MigrateAgentConnectionCommand(msList, avoidMsList, lbAlgorithm, lbCheckInterval);
|
|
||||||
agentManager.easySend(host.getId(), cmd); //answer not received as the agent disconnects and reconnects to other ms
|
|
||||||
updateLastManagementServer(host.getId(), fromMsId);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private boolean migrateAgentsInZone(DataCenterVO dc, String fromMsUuid, long fromMsId, List<String> avoidMsList,
|
||||||
|
String lbAlgorithm, boolean lbAlgorithmChanged, long migrationStartTimeInMs, long timeoutDurationInMs) {
|
||||||
|
List<Long> orderedHostIdList = getOrderedHostIdList(dc.getId());
|
||||||
|
if (!migrateNonRoutingHostAgentsInZone(fromMsUuid, fromMsId, dc, migrationStartTimeInMs,
|
||||||
|
timeoutDurationInMs, avoidMsList, lbAlgorithm, lbAlgorithmChanged, orderedHostIdList)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
List<Long> clusterIds = clusterDao.listAllClusterIds(dc.getId());
|
||||||
|
for (Long clusterId : clusterIds) {
|
||||||
|
if (!migrateRoutingHostAgentsInCluster(clusterId, fromMsUuid, fromMsId, dc, migrationStartTimeInMs,
|
||||||
|
timeoutDurationInMs, avoidMsList, lbAlgorithm, lbAlgorithmChanged, orderedHostIdList)) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
private final class MigrateAgentConnectionTask extends ManagedContextRunnable {
|
||||||
|
private long fromMsId;
|
||||||
|
Long hostId;
|
||||||
|
Long dcId;
|
||||||
|
List<Long> orderedHostIdList;
|
||||||
|
List<String> avoidMsList;
|
||||||
|
Long lbCheckInterval;
|
||||||
|
String lbAlgorithm;
|
||||||
|
boolean lbAlgorithmChanged;
|
||||||
|
|
||||||
|
public MigrateAgentConnectionTask(long fromMsId, Long hostId, Long dcId, List<Long> orderedHostIdList,
|
||||||
|
List<String> avoidMsList, Long lbCheckInterval, String lbAlgorithm, boolean lbAlgorithmChanged) {
|
||||||
|
this.fromMsId = fromMsId;
|
||||||
|
this.hostId = hostId;
|
||||||
|
this.orderedHostIdList = orderedHostIdList;
|
||||||
|
this.avoidMsList = avoidMsList;
|
||||||
|
this.lbCheckInterval = lbCheckInterval;
|
||||||
|
this.lbAlgorithm = lbAlgorithm;
|
||||||
|
this.lbAlgorithmChanged = lbAlgorithmChanged;
|
||||||
|
}
|
||||||
|
|
||||||
|
@Override
|
||||||
|
protected void runInContext() {
|
||||||
|
try {
|
||||||
|
List<String> msList = null;
|
||||||
|
if (lbAlgorithmChanged) {
|
||||||
|
// send new MS list when there is change in lb algorithm
|
||||||
|
msList = getManagementServerList(hostId, dcId, orderedHostIdList, lbAlgorithm);
|
||||||
|
}
|
||||||
|
|
||||||
|
final MigrateAgentConnectionCommand cmd = new MigrateAgentConnectionCommand(msList, avoidMsList, lbAlgorithm, lbCheckInterval);
|
||||||
|
cmd.setWait(60);
|
||||||
|
final Answer answer = agentManager.easySend(hostId, cmd); //may not receive answer when the agent disconnects immediately and try reconnecting to other ms host
|
||||||
|
if (answer != null && !answer.getResult()) {
|
||||||
|
logger.warn(String.format("Error while initiating migration of agent connection for host agent ID: %d - %s", hostId, answer.getDetails()));
|
||||||
|
}
|
||||||
|
updateLastManagementServer(hostId, fromMsId);
|
||||||
|
} catch (final Exception e) {
|
||||||
|
logger.error(String.format("Error migrating agent connection for host %d", hostId), e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
private void updateLastManagementServer(long hostId, long msId) {
|
private void updateLastManagementServer(long hostId, long msId) {
|
||||||
HostVO hostVO = hostDao.findById(hostId);
|
HostVO hostVO = hostDao.findById(hostId);
|
||||||
if (hostVO != null) {
|
if (hostVO != null) {
|
||||||
|
|
|
||||||
|
|
@ -106,7 +106,7 @@ public class IndirectAgentLBServiceImplTest {
|
||||||
|
|
||||||
List<Long> hostIds = hosts.stream().map(HostVO::getId).collect(Collectors.toList());
|
List<Long> hostIds = hosts.stream().map(HostVO::getId).collect(Collectors.toList());
|
||||||
doReturn(hostIds).when(hostDao).findHostIdsByZoneClusterResourceStateTypeAndHypervisorType(Mockito.anyLong(),
|
doReturn(hostIds).when(hostDao).findHostIdsByZoneClusterResourceStateTypeAndHypervisorType(Mockito.anyLong(),
|
||||||
Mockito.eq(null), Mockito.anyList(), Mockito.anyList(), Mockito.anyList());
|
Mockito.eq(null), Mockito.eq(null), Mockito.anyList(), Mockito.anyList(), Mockito.anyList());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Before
|
@Before
|
||||||
|
|
@ -203,14 +203,14 @@ public class IndirectAgentLBServiceImplTest {
|
||||||
@Test
|
@Test
|
||||||
public void testGetOrderedRunningHostIdsEmptyList() {
|
public void testGetOrderedRunningHostIdsEmptyList() {
|
||||||
doReturn(Collections.emptyList()).when(hostDao).findHostIdsByZoneClusterResourceStateTypeAndHypervisorType(
|
doReturn(Collections.emptyList()).when(hostDao).findHostIdsByZoneClusterResourceStateTypeAndHypervisorType(
|
||||||
Mockito.eq(DC_1_ID), Mockito.eq(null), Mockito.anyList(), Mockito.anyList(), Mockito.anyList());
|
Mockito.eq(DC_1_ID), Mockito.eq(null), Mockito.eq(null), Mockito.anyList(), Mockito.anyList(), Mockito.anyList());
|
||||||
Assert.assertTrue(agentMSLB.getOrderedHostIdList(DC_1_ID).isEmpty());
|
Assert.assertTrue(agentMSLB.getOrderedHostIdList(DC_1_ID).isEmpty());
|
||||||
}
|
}
|
||||||
|
|
||||||
@Test
|
@Test
|
||||||
public void testGetOrderedRunningHostIdsOrderList() {
|
public void testGetOrderedRunningHostIdsOrderList() {
|
||||||
doReturn(Arrays.asList(host4.getId(), host2.getId(), host1.getId(), host3.getId())).when(hostDao)
|
doReturn(Arrays.asList(host4.getId(), host2.getId(), host1.getId(), host3.getId())).when(hostDao)
|
||||||
.findHostIdsByZoneClusterResourceStateTypeAndHypervisorType(Mockito.eq(DC_1_ID), Mockito.eq(null),
|
.findHostIdsByZoneClusterResourceStateTypeAndHypervisorType(Mockito.eq(DC_1_ID), Mockito.eq(null), Mockito.eq(null),
|
||||||
Mockito.anyList(), Mockito.anyList(), Mockito.anyList());
|
Mockito.anyList(), Mockito.anyList(), Mockito.anyList());
|
||||||
Assert.assertEquals(Arrays.asList(host1.getId(), host2.getId(), host3.getId(), host4.getId()),
|
Assert.assertEquals(Arrays.asList(host1.getId(), host2.getId(), host3.getId(), host4.getId()),
|
||||||
agentMSLB.getOrderedHostIdList(DC_1_ID));
|
agentMSLB.getOrderedHostIdList(DC_1_ID));
|
||||||
|
|
|
||||||
|
|
@ -25,7 +25,7 @@ import java.net.SocketAddress;
|
||||||
* WorkerFactory creates and selects workers.
|
* WorkerFactory creates and selects workers.
|
||||||
*/
|
*/
|
||||||
public interface HandlerFactory {
|
public interface HandlerFactory {
|
||||||
public Task create(Task.Type type, Link link, byte[] data);
|
Task create(Task.Type type, Link link, byte[] data);
|
||||||
default int getMaxConcurrentNewConnectionsCount() {
|
default int getMaxConcurrentNewConnectionsCount() {
|
||||||
return 0;
|
return 0;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -617,8 +617,8 @@ public class Link {
|
||||||
final long timeTaken = System.currentTimeMillis() - startTimeMills;
|
final long timeTaken = System.currentTimeMillis() - startTimeMills;
|
||||||
|
|
||||||
if (timeTaken > timeoutMillis) {
|
if (timeTaken > timeoutMillis) {
|
||||||
LOGGER.warn("SSL Handshake has taken more than {}ms to connect to: {}" +
|
LOGGER.warn("SSL Handshake has taken more than {} ms to connect to: {}" +
|
||||||
" while status: {}. Please investigate this connection.", socketChannel.getRemoteAddress(),
|
" while status: {}. Please investigate this connection.", timeoutMillis, socketChannel.getRemoteAddress(),
|
||||||
handshakeStatus);
|
handshakeStatus);
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -115,4 +115,8 @@ public class NioClient extends NioConnection {
|
||||||
}
|
}
|
||||||
logger.info("NioClient connection closed");
|
logger.info("NioClient connection closed");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public String getHost() {
|
||||||
|
return host;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -83,26 +83,19 @@ public abstract class NioConnection implements Callable<Boolean> {
|
||||||
protected Set<SocketChannel> socketChannels = new HashSet<>();
|
protected Set<SocketChannel> socketChannels = new HashSet<>();
|
||||||
protected Integer sslHandshakeTimeout = null;
|
protected Integer sslHandshakeTimeout = null;
|
||||||
private final int factoryMaxNewConnectionsCount;
|
private final int factoryMaxNewConnectionsCount;
|
||||||
|
protected boolean blockNewConnections;
|
||||||
|
|
||||||
public NioConnection(final String name, final int port, final int workers, final HandlerFactory factory) {
|
public NioConnection(final String name, final int port, final int workers, final HandlerFactory factory) {
|
||||||
_name = name;
|
_name = name;
|
||||||
_isRunning = false;
|
_isRunning = false;
|
||||||
|
blockNewConnections = false;
|
||||||
_selector = null;
|
_selector = null;
|
||||||
_port = port;
|
_port = port;
|
||||||
_workers = workers;
|
_workers = workers;
|
||||||
_factory = factory;
|
_factory = factory;
|
||||||
this.factoryMaxNewConnectionsCount = factory.getMaxConcurrentNewConnectionsCount();
|
this.factoryMaxNewConnectionsCount = factory.getMaxConcurrentNewConnectionsCount();
|
||||||
_executor = new ThreadPoolExecutor(workers, 5 * workers, 1, TimeUnit.DAYS,
|
initWorkersExecutor();
|
||||||
new LinkedBlockingQueue<>(5 * workers), new NamedThreadFactory(name + "-Handler"),
|
initSSLHandshakeExecutor();
|
||||||
new ThreadPoolExecutor.AbortPolicy());
|
|
||||||
String sslHandshakeHandlerName = name + "-SSLHandshakeHandler";
|
|
||||||
if (factoryMaxNewConnectionsCount > 0) {
|
|
||||||
_sslHandshakeExecutor = new ThreadPoolExecutor(0, this.factoryMaxNewConnectionsCount, 30,
|
|
||||||
TimeUnit.MINUTES, new SynchronousQueue<>(), new NamedThreadFactory(sslHandshakeHandlerName),
|
|
||||||
new ThreadPoolExecutor.AbortPolicy());
|
|
||||||
} else {
|
|
||||||
_sslHandshakeExecutor = Executors.newCachedThreadPool(new NamedThreadFactory(sslHandshakeHandlerName));
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
public void setCAService(final CAService caService) {
|
public void setCAService(final CAService caService) {
|
||||||
|
|
@ -127,10 +120,14 @@ public abstract class NioConnection implements Callable<Boolean> {
|
||||||
_isStartup = true;
|
_isStartup = true;
|
||||||
|
|
||||||
if (_executor.isShutdown()) {
|
if (_executor.isShutdown()) {
|
||||||
_executor = new ThreadPoolExecutor(_workers, 5 * _workers, 1, TimeUnit.DAYS, new LinkedBlockingQueue<>(), new NamedThreadFactory(_name + "-Handler"));
|
initWorkersExecutor();
|
||||||
|
}
|
||||||
|
if (_sslHandshakeExecutor.isShutdown()) {
|
||||||
|
initSSLHandshakeExecutor();
|
||||||
}
|
}
|
||||||
_threadExecutor = Executors.newSingleThreadExecutor(new NamedThreadFactory(this._name + "-NioConnectionHandler"));
|
_threadExecutor = Executors.newSingleThreadExecutor(new NamedThreadFactory(this._name + "-NioConnectionHandler"));
|
||||||
_isRunning = true;
|
_isRunning = true;
|
||||||
|
blockNewConnections = false;
|
||||||
_futureTask = _threadExecutor.submit(this);
|
_futureTask = _threadExecutor.submit(this);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -138,12 +135,30 @@ public abstract class NioConnection implements Callable<Boolean> {
|
||||||
_executor.shutdown();
|
_executor.shutdown();
|
||||||
_sslHandshakeExecutor.shutdown();
|
_sslHandshakeExecutor.shutdown();
|
||||||
_isRunning = false;
|
_isRunning = false;
|
||||||
|
blockNewConnections = true;
|
||||||
if (_threadExecutor != null) {
|
if (_threadExecutor != null) {
|
||||||
_futureTask.cancel(false);
|
_futureTask.cancel(false);
|
||||||
_threadExecutor.shutdown();
|
_threadExecutor.shutdown();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
private void initWorkersExecutor() {
|
||||||
|
_executor = new ThreadPoolExecutor(_workers, 5 * _workers, 1, TimeUnit.DAYS,
|
||||||
|
new LinkedBlockingQueue<>(5 * _workers), new NamedThreadFactory(_name + "-Handler"),
|
||||||
|
new ThreadPoolExecutor.AbortPolicy());
|
||||||
|
}
|
||||||
|
|
||||||
|
private void initSSLHandshakeExecutor() {
|
||||||
|
String sslHandshakeHandlerName = _name + "-SSLHandshakeHandler";
|
||||||
|
if (factoryMaxNewConnectionsCount > 0) {
|
||||||
|
_sslHandshakeExecutor = new ThreadPoolExecutor(0, this.factoryMaxNewConnectionsCount, 30,
|
||||||
|
TimeUnit.MINUTES, new SynchronousQueue<>(), new NamedThreadFactory(sslHandshakeHandlerName),
|
||||||
|
new ThreadPoolExecutor.AbortPolicy());
|
||||||
|
} else {
|
||||||
|
_sslHandshakeExecutor = Executors.newCachedThreadPool(new NamedThreadFactory(sslHandshakeHandlerName));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
public boolean isRunning() {
|
public boolean isRunning() {
|
||||||
return !_futureTask.isDone();
|
return !_futureTask.isDone();
|
||||||
}
|
}
|
||||||
|
|
@ -210,6 +225,16 @@ public abstract class NioConnection implements Callable<Boolean> {
|
||||||
|
|
||||||
abstract void unregisterLink(InetSocketAddress saddr);
|
abstract void unregisterLink(InetSocketAddress saddr);
|
||||||
|
|
||||||
|
protected boolean rejectConnectionIfBlocked(final SocketChannel socketChannel) throws IOException {
|
||||||
|
if (!blockNewConnections) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
logger.warn("Rejecting new connection as the server is blocked from accepting new connections");
|
||||||
|
socketChannel.close();
|
||||||
|
_selector.wakeup();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
protected boolean rejectConnectionIfBusy(final SocketChannel socketChannel) throws IOException {
|
protected boolean rejectConnectionIfBusy(final SocketChannel socketChannel) throws IOException {
|
||||||
if (factoryMaxNewConnectionsCount <= 0 || _factory.getNewConnectionsCount() < factoryMaxNewConnectionsCount) {
|
if (factoryMaxNewConnectionsCount <= 0 || _factory.getNewConnectionsCount() < factoryMaxNewConnectionsCount) {
|
||||||
return false;
|
return false;
|
||||||
|
|
@ -226,7 +251,7 @@ public abstract class NioConnection implements Callable<Boolean> {
|
||||||
protected void accept(final SelectionKey key) throws IOException {
|
protected void accept(final SelectionKey key) throws IOException {
|
||||||
final ServerSocketChannel serverSocketChannel = (ServerSocketChannel)key.channel();
|
final ServerSocketChannel serverSocketChannel = (ServerSocketChannel)key.channel();
|
||||||
final SocketChannel socketChannel = serverSocketChannel.accept();
|
final SocketChannel socketChannel = serverSocketChannel.accept();
|
||||||
if (rejectConnectionIfBusy(socketChannel)) {
|
if (rejectConnectionIfBlocked(socketChannel) || rejectConnectionIfBusy(socketChannel)) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
socketChannel.configureBlocking(false);
|
socketChannel.configureBlocking(false);
|
||||||
|
|
@ -520,6 +545,14 @@ public abstract class NioConnection implements Callable<Boolean> {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
public void block() {
|
||||||
|
blockNewConnections = true;
|
||||||
|
}
|
||||||
|
|
||||||
|
public void unblock() {
|
||||||
|
blockNewConnections = false;
|
||||||
|
}
|
||||||
|
|
||||||
public class ChangeRequest {
|
public class ChangeRequest {
|
||||||
public static final int REGISTER = 1;
|
public static final int REGISTER = 1;
|
||||||
public static final int CHANGEOPS = 2;
|
public static final int CHANGEOPS = 2;
|
||||||
|
|
|
||||||
Loading…
Reference in New Issue