CLOUDSTACK-10136: Fix RemoteHostEndPoint thread growth

This fixes the following:
- Unchecked thread growth in RemoteEndHostEndPoint
- Potential NPE while finding EP for a storage/scope

Unbounded thread growth can be reproduced with following findings:
- Every unreachable template would produce 6 new threads (in a single
ScheduledExecutorService instance) spaced by 10 seconds
- Every reachable template url without the template would produce 1 new
thread (and one ScheduledExecutorService instance), it errors out quickly without
causing more thread growth.
- Every valid url will produce upto 10 threads as the same ep (endpoint
instance) will be reused to query upload/download (async callback)
progresses.

Every RemoteHostEndPoint instances creates its own
ScheduledExecutorService instance which is why in the jstack dump, we
see several threads that share the prefix RemoteHostEndPoint-{1..10}
(given poolsize is defined as 10, it uses suffixes 1-10).

This fixes the discovered thread leakage with following notes:
- Instead of ScheduledExecutorService instance, a cached pool could be
used instead and was implemented, and with `static` scope to be reused
among other future RemoteHostEndPoint instances.
- It was not clear why we would want to wait when we've Answers returned
from the remote EP, and therefore a scheduled/delayed Runnable was
not required at all for processing answers. ScheduledExecutorService
was therefore not really required, moved to ExecutorService instead.
- Another benefit of using a cached pool is that it will shutdown
threads if they are not used in 60 seconds, and they get re-used for
future runnable submissions.
- Caveat: the executor service is still unbounded, however, the use-case
that this method is used for short jobs to check upload/download
progresses fits the case here.
- Refactored CmdRunner to not use/reference objects from parent class.

Signed-off-by: Rohit Yadav <rohit.yadav@shapeblue.com>
This commit is contained in:
Rohit Yadav 2017-11-08 16:50:46 +05:30 committed by Rohit Yadav
parent 9c067c0496
commit 3ee8d83621
2 changed files with 24 additions and 21 deletions

View File

@ -18,17 +18,15 @@
*/
package org.apache.cloudstack.storage;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
import java.util.concurrent.ScheduledExecutorService;
import java.util.concurrent.TimeUnit;
import javax.inject.Inject;
import org.apache.log4j.Logger;
import org.apache.cloudstack.engine.subsystem.api.storage.EndPoint;
import org.apache.cloudstack.framework.async.AsyncCompletionCallback;
import org.apache.cloudstack.managed.context.ManagedContextRunnable;
import org.apache.log4j.Logger;
import com.cloud.agent.AgentManager;
import com.cloud.agent.Listener;
@ -54,9 +52,11 @@ import com.cloud.vm.dao.SecondaryStorageVmDao;
public class RemoteHostEndPoint implements EndPoint {
private static final Logger s_logger = Logger.getLogger(RemoteHostEndPoint.class);
private long hostId;
private String hostAddress;
private String publicAddress;
@Inject
AgentManager agentMgr;
@Inject
@ -65,10 +65,10 @@ public class RemoteHostEndPoint implements EndPoint {
protected SecondaryStorageVmDao vmDao;
@Inject
protected HostDao _hostDao;
private ScheduledExecutorService executor;
private static ExecutorService executorService = Executors.newCachedThreadPool(new NamedThreadFactory("RemoteHostEndPoint"));
public RemoteHostEndPoint() {
executor = Executors.newScheduledThreadPool(10, new NamedThreadFactory("RemoteHostEndPoint"));
}
private void configure(Host host) {
@ -134,17 +134,17 @@ public class RemoteHostEndPoint implements EndPoint {
}
private class CmdRunner extends ManagedContextRunnable implements Listener {
final AsyncCompletionCallback<Answer> callback;
Answer answer;
private final AsyncCompletionCallback<Answer> callback;
private Answer answer;
public CmdRunner(AsyncCompletionCallback<Answer> callback) {
CmdRunner(final AsyncCompletionCallback<Answer> callback) {
this.callback = callback;
}
@Override
public boolean processAnswers(long agentId, long seq, Answer[] answers) {
answer = answers[0];
executor.schedule(this, 10, TimeUnit.SECONDS);
this.answer = answers[0];
RemoteHostEndPoint.executorService.submit(this);
return true;
}
@ -204,7 +204,7 @@ public class RemoteHostEndPoint implements EndPoint {
@Override
protected void runInContext() {
callback.complete(answer);
this.callback.complete(this.answer);
}
}

View File

@ -106,16 +106,19 @@ public class DefaultEndPointSelector implements EndPointSelector {
StringBuilder sbuilder = new StringBuilder();
sbuilder.append(sqlBase);
if (scope.getScopeType() == ScopeType.HOST) {
sbuilder.append(" and h.id = ");
sbuilder.append(scope.getScopeId());
} else if (scope.getScopeType() == ScopeType.CLUSTER) {
sbuilder.append(" and h.cluster_id = ");
sbuilder.append(scope.getScopeId());
} else if (scope.getScopeType() == ScopeType.ZONE) {
sbuilder.append(" and h.data_center_id = ");
sbuilder.append(scope.getScopeId());
if (scope != null) {
if (scope.getScopeType() == ScopeType.HOST) {
sbuilder.append(" and h.id = ");
sbuilder.append(scope.getScopeId());
} else if (scope.getScopeType() == ScopeType.CLUSTER) {
sbuilder.append(" and h.cluster_id = ");
sbuilder.append(scope.getScopeId());
} else if (scope.getScopeType() == ScopeType.ZONE) {
sbuilder.append(" and h.data_center_id = ");
sbuilder.append(scope.getScopeId());
}
}
// TODO: order by rand() is slow if there are lot of hosts
sbuilder.append(") t where t.value<>'true' or t.value is null"); //Added for exclude cluster's subquery
sbuilder.append(" ORDER by rand() limit 1");