From 3ee8d83621c23f976413fdce6d9245197497d504 Mon Sep 17 00:00:00 2001 From: Rohit Yadav Date: Wed, 8 Nov 2017 16:50:46 +0530 Subject: [PATCH] CLOUDSTACK-10136: Fix RemoteHostEndPoint thread growth This fixes the following: - Unchecked thread growth in RemoteEndHostEndPoint - Potential NPE while finding EP for a storage/scope Unbounded thread growth can be reproduced with following findings: - Every unreachable template would produce 6 new threads (in a single ScheduledExecutorService instance) spaced by 10 seconds - Every reachable template url without the template would produce 1 new thread (and one ScheduledExecutorService instance), it errors out quickly without causing more thread growth. - Every valid url will produce upto 10 threads as the same ep (endpoint instance) will be reused to query upload/download (async callback) progresses. Every RemoteHostEndPoint instances creates its own ScheduledExecutorService instance which is why in the jstack dump, we see several threads that share the prefix RemoteHostEndPoint-{1..10} (given poolsize is defined as 10, it uses suffixes 1-10). This fixes the discovered thread leakage with following notes: - Instead of ScheduledExecutorService instance, a cached pool could be used instead and was implemented, and with `static` scope to be reused among other future RemoteHostEndPoint instances. - It was not clear why we would want to wait when we've Answers returned from the remote EP, and therefore a scheduled/delayed Runnable was not required at all for processing answers. ScheduledExecutorService was therefore not really required, moved to ExecutorService instead. - Another benefit of using a cached pool is that it will shutdown threads if they are not used in 60 seconds, and they get re-used for future runnable submissions. - Caveat: the executor service is still unbounded, however, the use-case that this method is used for short jobs to check upload/download progresses fits the case here. - Refactored CmdRunner to not use/reference objects from parent class. Signed-off-by: Rohit Yadav --- .../storage/RemoteHostEndPoint.java | 24 +++++++++---------- .../endpoint/DefaultEndPointSelector.java | 21 +++++++++------- 2 files changed, 24 insertions(+), 21 deletions(-) diff --git a/engine/storage/src/org/apache/cloudstack/storage/RemoteHostEndPoint.java b/engine/storage/src/org/apache/cloudstack/storage/RemoteHostEndPoint.java index 3bad62eb574..b438bc121e4 100644 --- a/engine/storage/src/org/apache/cloudstack/storage/RemoteHostEndPoint.java +++ b/engine/storage/src/org/apache/cloudstack/storage/RemoteHostEndPoint.java @@ -18,17 +18,15 @@ */ package org.apache.cloudstack.storage; +import java.util.concurrent.ExecutorService; import java.util.concurrent.Executors; -import java.util.concurrent.ScheduledExecutorService; -import java.util.concurrent.TimeUnit; import javax.inject.Inject; -import org.apache.log4j.Logger; - import org.apache.cloudstack.engine.subsystem.api.storage.EndPoint; import org.apache.cloudstack.framework.async.AsyncCompletionCallback; import org.apache.cloudstack.managed.context.ManagedContextRunnable; +import org.apache.log4j.Logger; import com.cloud.agent.AgentManager; import com.cloud.agent.Listener; @@ -54,9 +52,11 @@ import com.cloud.vm.dao.SecondaryStorageVmDao; public class RemoteHostEndPoint implements EndPoint { private static final Logger s_logger = Logger.getLogger(RemoteHostEndPoint.class); + private long hostId; private String hostAddress; private String publicAddress; + @Inject AgentManager agentMgr; @Inject @@ -65,10 +65,10 @@ public class RemoteHostEndPoint implements EndPoint { protected SecondaryStorageVmDao vmDao; @Inject protected HostDao _hostDao; - private ScheduledExecutorService executor; + + private static ExecutorService executorService = Executors.newCachedThreadPool(new NamedThreadFactory("RemoteHostEndPoint")); public RemoteHostEndPoint() { - executor = Executors.newScheduledThreadPool(10, new NamedThreadFactory("RemoteHostEndPoint")); } private void configure(Host host) { @@ -134,17 +134,17 @@ public class RemoteHostEndPoint implements EndPoint { } private class CmdRunner extends ManagedContextRunnable implements Listener { - final AsyncCompletionCallback callback; - Answer answer; + private final AsyncCompletionCallback callback; + private Answer answer; - public CmdRunner(AsyncCompletionCallback callback) { + CmdRunner(final AsyncCompletionCallback callback) { this.callback = callback; } @Override public boolean processAnswers(long agentId, long seq, Answer[] answers) { - answer = answers[0]; - executor.schedule(this, 10, TimeUnit.SECONDS); + this.answer = answers[0]; + RemoteHostEndPoint.executorService.submit(this); return true; } @@ -204,7 +204,7 @@ public class RemoteHostEndPoint implements EndPoint { @Override protected void runInContext() { - callback.complete(answer); + this.callback.complete(this.answer); } } diff --git a/engine/storage/src/org/apache/cloudstack/storage/endpoint/DefaultEndPointSelector.java b/engine/storage/src/org/apache/cloudstack/storage/endpoint/DefaultEndPointSelector.java index e414b6c4c35..c25c1adaf7a 100644 --- a/engine/storage/src/org/apache/cloudstack/storage/endpoint/DefaultEndPointSelector.java +++ b/engine/storage/src/org/apache/cloudstack/storage/endpoint/DefaultEndPointSelector.java @@ -106,16 +106,19 @@ public class DefaultEndPointSelector implements EndPointSelector { StringBuilder sbuilder = new StringBuilder(); sbuilder.append(sqlBase); - if (scope.getScopeType() == ScopeType.HOST) { - sbuilder.append(" and h.id = "); - sbuilder.append(scope.getScopeId()); - } else if (scope.getScopeType() == ScopeType.CLUSTER) { - sbuilder.append(" and h.cluster_id = "); - sbuilder.append(scope.getScopeId()); - } else if (scope.getScopeType() == ScopeType.ZONE) { - sbuilder.append(" and h.data_center_id = "); - sbuilder.append(scope.getScopeId()); + if (scope != null) { + if (scope.getScopeType() == ScopeType.HOST) { + sbuilder.append(" and h.id = "); + sbuilder.append(scope.getScopeId()); + } else if (scope.getScopeType() == ScopeType.CLUSTER) { + sbuilder.append(" and h.cluster_id = "); + sbuilder.append(scope.getScopeId()); + } else if (scope.getScopeType() == ScopeType.ZONE) { + sbuilder.append(" and h.data_center_id = "); + sbuilder.append(scope.getScopeId()); + } } + // TODO: order by rand() is slow if there are lot of hosts sbuilder.append(") t where t.value<>'true' or t.value is null"); //Added for exclude cluster's subquery sbuilder.append(" ORDER by rand() limit 1");