CLOUDSTACK-6047: Fix timeout issue when try to execute aggregated commands

Add executeInVR() with timeout interface to VirtualRouterDeployer

AggregationControlCommand with Action.Finish may take longer than normal command
since it would execute all the commands in one execution, and it may result in
SSH timeout for SshHelper or other mechanism communicate with VR.

Introduce an new executeInVR() interface with added timeout period for waiting
FinishAggregationCommand to complete execution.
This commit is contained in:
Sheng Yang 2014-03-18 11:26:04 -07:00
parent b4d8332a69
commit d56b45a1c3
7 changed files with 110 additions and 78 deletions

View File

@ -21,6 +21,7 @@ import com.cloud.utils.ExecutionResult;
public interface VirtualRouterDeployer {
ExecutionResult executeInVR(String routerIp, String script, String args);
ExecutionResult executeInVR(String routerIp, String script, String args, int timeout);
ExecutionResult createFileInVR(String routerIp, String path, String filename, String content);
ExecutionResult prepareCommand(NetworkElementCommand cmd);
ExecutionResult cleanupCommand(NetworkElementCommand cmd);

View File

@ -1137,11 +1137,13 @@ public class VirtualRoutingResource {
return new Answer(cmd);
} else if (action == Action.Finish) {
Queue<NetworkElementCommand> queue = _vrAggregateCommandsSet.get(routerName);
int answerCounts = 0;
try {
StringBuilder sb = new StringBuilder();
sb.append("#Apache CloudStack Virtual Router Config File\n");
sb.append("<version>\n" + _cfgVersion + "\n</version>\n");
for (NetworkElementCommand command : queue) {
answerCounts += command.getAnswersCount();
List<ConfigItem> cfg = generateCommandCfg(command);
if (cfg == null) {
s_logger.warn("Unknown commands for VirtualRoutingResource, but continue: " + cmd.toString());
@ -1168,7 +1170,12 @@ public class VirtualRoutingResource {
return new Answer(cmd, false, result.getDetails());
}
result = _vrDeployer.executeInVR(cmd.getRouterAccessIp(), VRScripts.VR_CFG, "-c " + cfgFilePath + cfgFileName);
// 3 second for each answer should be enough, and 120s is the minimal timeout
int timeout = answerCounts * 3;
if (timeout < 120) {
timeout = 120;
}
result = _vrDeployer.executeInVR(cmd.getRouterAccessIp(), VRScripts.VR_CFG, "-c " + cfgFilePath + cfgFileName, timeout);
if (!result.isSuccess()) {
return new Answer(cmd, false, result.getDetails());
}

View File

@ -91,6 +91,11 @@ public class VirtualRoutingResourceTest implements VirtualRouterDeployer {
@Override
public ExecutionResult executeInVR(String routerIp, String script, String args) {
return executeInVR(routerIp, script, args, 60);
}
@Override
public ExecutionResult executeInVR(String routerIp, String script, String args, int timeout) {
assertEquals(routerIp, ROUTERIP);
verifyCommand(_currentCmd, script, args);
return new ExecutionResult(true, null);

View File

@ -574,6 +574,11 @@ public class HypervDirectConnectResource extends ServerResourceBase implements S
@Override
public ExecutionResult executeInVR(String routerIP, String script, String args) {
return executeInVR(routerIP, script, args, 120);
}
@Override
public ExecutionResult executeInVR(String routerIP, String script, String args, int timeout) {
Pair<Boolean, String> result;
//TODO: Password should be masked, cannot output to log directly
@ -582,7 +587,8 @@ public class HypervDirectConnectResource extends ServerResourceBase implements S
}
try {
result = SshHelper.sshExecute(routerIP, DEFAULT_DOMR_SSHPORT, "root", getSystemVMKeyFile(), null, "/opt/cloud/bin/" + script + " " + args);
result = SshHelper.sshExecute(routerIP, DEFAULT_DOMR_SSHPORT, "root", getSystemVMKeyFile(), null, "/opt/cloud/bin/" + script + " " + args,
60000, 60000, timeout * 1000);
} catch (Exception e) {
String msg = "Command failed due to " + e ;
s_logger.error(msg);

View File

@ -331,7 +331,12 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv
@Override
public ExecutionResult executeInVR(String routerIp, String script, String args) {
final Script command = new Script(_routerProxyPath, _timeout, s_logger);
return executeInVR(routerIp, script, args, _timeout);
}
@Override
public ExecutionResult executeInVR(String routerIp, String script, String args, int timeout) {
final Script command = new Script(_routerProxyPath, timeout, s_logger);
final AllLinesParser parser = new AllLinesParser();
command.add(script);
command.add(routerIp);

View File

@ -1247,6 +1247,11 @@ public class VmwareResource implements StoragePoolResource, ServerResource, Vmwa
@Override
public ExecutionResult executeInVR(String routerIP, String script, String args) {
return executeInVR(routerIP, script, args, 120);
}
@Override
public ExecutionResult executeInVR(String routerIP, String script, String args, int timeout) {
Pair<Boolean, String> result;
//TODO: Password should be masked, cannot output to log directly
@ -1256,7 +1261,8 @@ public class VmwareResource implements StoragePoolResource, ServerResource, Vmwa
try {
VmwareManager mgr = getServiceContext().getStockObject(VmwareManager.CONTEXT_STOCK_NAME);
result = SshHelper.sshExecute(routerIP, DefaultDomRSshPort, "root", mgr.getSystemVMKeyFile(), null, "/opt/cloud/bin/" + script + " " + args);
result = SshHelper.sshExecute(routerIP, DefaultDomRSshPort, "root", mgr.getSystemVMKeyFile(), null, "/opt/cloud/bin/" + script + " " + args,
60000, 60000, timeout * 1000);
} catch (Exception e) {
String msg = "Command failed due to " + VmwareHelper.getExceptionMessage(e);
s_logger.error(msg);

View File

@ -16,77 +16,6 @@
// under the License.
package com.cloud.hypervisor.xen.resource;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Queue;
import java.util.Random;
import java.util.Set;
import java.util.UUID;
import javax.ejb.Local;
import javax.naming.ConfigurationException;
import javax.xml.parsers.DocumentBuilderFactory;
import org.apache.log4j.Logger;
import org.apache.xmlrpc.XmlRpcException;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import com.trilead.ssh2.SCPClient;
import com.xensource.xenapi.Bond;
import com.xensource.xenapi.Connection;
import com.xensource.xenapi.Console;
import com.xensource.xenapi.GPUGroup;
import com.xensource.xenapi.Host;
import com.xensource.xenapi.HostCpu;
import com.xensource.xenapi.HostMetrics;
import com.xensource.xenapi.Network;
import com.xensource.xenapi.PBD;
import com.xensource.xenapi.PGPU;
import com.xensource.xenapi.PIF;
import com.xensource.xenapi.Pool;
import com.xensource.xenapi.SR;
import com.xensource.xenapi.Session;
import com.xensource.xenapi.Task;
import com.xensource.xenapi.Types;
import com.xensource.xenapi.Types.BadServerResponse;
import com.xensource.xenapi.Types.VmPowerState;
import com.xensource.xenapi.Types.XenAPIException;
import com.xensource.xenapi.VBD;
import com.xensource.xenapi.VBDMetrics;
import com.xensource.xenapi.VDI;
import com.xensource.xenapi.VGPU;
import com.xensource.xenapi.VGPUType;
import com.xensource.xenapi.VIF;
import com.xensource.xenapi.VLAN;
import com.xensource.xenapi.VM;
import com.xensource.xenapi.VMGuestMetrics;
import com.xensource.xenapi.XenAPIObject;
import org.apache.cloudstack.storage.command.StorageSubSystemCommand;
import org.apache.cloudstack.storage.to.TemplateObjectTO;
import org.apache.cloudstack.storage.to.VolumeObjectTO;
import com.cloud.agent.IAgentControl;
import com.cloud.agent.api.Answer;
import com.cloud.agent.api.AttachIsoCommand;
@ -103,8 +32,8 @@ import com.cloud.agent.api.CheckVirtualMachineCommand;
import com.cloud.agent.api.CleanupNetworkRulesCmd;
import com.cloud.agent.api.ClusterSyncAnswer;
import com.cloud.agent.api.ClusterSyncCommand;
import com.cloud.agent.api.ClusterVMMetaDataSyncCommand;
import com.cloud.agent.api.ClusterVMMetaDataSyncAnswer;
import com.cloud.agent.api.ClusterVMMetaDataSyncCommand;
import com.cloud.agent.api.Command;
import com.cloud.agent.api.CreateStoragePoolCommand;
import com.cloud.agent.api.CreateVMSnapshotAnswer;
@ -249,6 +178,73 @@ import com.cloud.vm.VirtualMachine;
import com.cloud.vm.VirtualMachine.PowerState;
import com.cloud.vm.VirtualMachine.State;
import com.cloud.vm.snapshot.VMSnapshot;
import com.trilead.ssh2.SCPClient;
import com.xensource.xenapi.Bond;
import com.xensource.xenapi.Connection;
import com.xensource.xenapi.Console;
import com.xensource.xenapi.GPUGroup;
import com.xensource.xenapi.Host;
import com.xensource.xenapi.HostCpu;
import com.xensource.xenapi.HostMetrics;
import com.xensource.xenapi.Network;
import com.xensource.xenapi.PBD;
import com.xensource.xenapi.PGPU;
import com.xensource.xenapi.PIF;
import com.xensource.xenapi.Pool;
import com.xensource.xenapi.SR;
import com.xensource.xenapi.Session;
import com.xensource.xenapi.Task;
import com.xensource.xenapi.Types;
import com.xensource.xenapi.Types.BadServerResponse;
import com.xensource.xenapi.Types.VmPowerState;
import com.xensource.xenapi.Types.XenAPIException;
import com.xensource.xenapi.VBD;
import com.xensource.xenapi.VBDMetrics;
import com.xensource.xenapi.VDI;
import com.xensource.xenapi.VGPU;
import com.xensource.xenapi.VGPUType;
import com.xensource.xenapi.VIF;
import com.xensource.xenapi.VLAN;
import com.xensource.xenapi.VM;
import com.xensource.xenapi.VMGuestMetrics;
import com.xensource.xenapi.XenAPIObject;
import org.apache.cloudstack.storage.command.StorageSubSystemCommand;
import org.apache.cloudstack.storage.to.TemplateObjectTO;
import org.apache.cloudstack.storage.to.VolumeObjectTO;
import org.apache.log4j.Logger;
import org.apache.xmlrpc.XmlRpcException;
import org.w3c.dom.Document;
import org.w3c.dom.Node;
import org.w3c.dom.NodeList;
import org.xml.sax.InputSource;
import javax.ejb.Local;
import javax.naming.ConfigurationException;
import javax.xml.parsers.DocumentBuilderFactory;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.io.StringReader;
import java.net.URI;
import java.net.URISyntaxException;
import java.net.URL;
import java.net.URLConnection;
import java.util.ArrayList;
import java.util.Date;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.Properties;
import java.util.Queue;
import java.util.Random;
import java.util.Set;
import java.util.UUID;
/**
* CitrixResourceBase encapsulates the calls to the XenServer Xapi process
@ -558,13 +554,19 @@ public abstract class CitrixResourceBase implements ServerResource, HypervisorRe
}
@Override
public ExecutionResult executeInVR(String routerIP, String script, String args) {
public ExecutionResult executeInVR(String routerIP, String script, String args, int timeout) {
Connection conn = getConnection();
String rc = callHostPlugin(conn, "vmops", "routerProxy", "args", script + " " + routerIP + " " + args);
String rc = callHostPluginAsync(conn, "vmops", "routerProxy", timeout, "args", script + " " + routerIP + " " + args);
// Fail case would be start with "fail#"
return new ExecutionResult(rc.startsWith("succ#"), rc.substring(5));
}
@Override
public ExecutionResult executeInVR(String routerIP, String script, String args) {
// Timeout is 120 seconds by default
return executeInVR(routerIP, script, args, 120);
}
@Override
public ExecutionResult createFileInVR(String routerIp, String path, String filename, String content) {
Connection conn = getConnection();