From ecc71518a9a9fe1d7078a90dcbb4bbcc9bc2ca61 Mon Sep 17 00:00:00 2001 From: Harikrishna Patnala Date: Wed, 9 Apr 2014 17:57:16 +0530 Subject: [PATCH] CLOUDSTACK-6253: Optimizing VR alerts getting algorithm In addition to this a new configuration parameter is added router.alerts.check.interval defaulted to 30minutes to check for alerts in Virtual Router --- .../agent/api/GetRouterAlertsAnswer.java | 8 +- .../VirtualRoutingResource.java | 39 +++------ .../src/com/cloud/vm/dao/DomainRouterDao.java | 2 + .../com/cloud/vm/dao/DomainRouterDaoImpl.java | 18 ++++ .../agent/manager/SimulatorManagerImpl.java | 3 + .../VirtualNetworkApplianceManager.java | 4 + .../VirtualNetworkApplianceManagerImpl.java | 83 ++++++++++++++----- .../config/opt/cloud/bin/getRouterAlerts.sh | 45 ++++------ .../test_VirtualRouter_alerts.py | 4 +- 9 files changed, 121 insertions(+), 85 deletions(-) rename test/integration/{smoke => component}/test_VirtualRouter_alerts.py (97%) diff --git a/core/src/com/cloud/agent/api/GetRouterAlertsAnswer.java b/core/src/com/cloud/agent/api/GetRouterAlertsAnswer.java index 06a7a7a84eb..31485396f4d 100644 --- a/core/src/com/cloud/agent/api/GetRouterAlertsAnswer.java +++ b/core/src/com/cloud/agent/api/GetRouterAlertsAnswer.java @@ -22,7 +22,6 @@ import com.cloud.agent.api.routing.GetRouterAlertsCommand; public class GetRouterAlertsAnswer extends Answer { - String routerName; String[] alerts; String timeStamp; @@ -36,8 +35,8 @@ public class GetRouterAlertsAnswer extends Answer { } - public GetRouterAlertsAnswer(GetRouterAlertsCommand cmd, Exception ex) { - super(cmd, ex); + public GetRouterAlertsAnswer(GetRouterAlertsCommand cmd, String details) { + super(cmd, false, details); } public void setAlerts(String[] alerts) { @@ -56,7 +55,4 @@ public class GetRouterAlertsAnswer extends Answer { return timeStamp; } - public String getRouterName() { - return routerName; - } } diff --git a/core/src/com/cloud/agent/resource/virtualnetwork/VirtualRoutingResource.java b/core/src/com/cloud/agent/resource/virtualnetwork/VirtualRoutingResource.java index df4ed2cc7db..3b438533436 100755 --- a/core/src/com/cloud/agent/resource/virtualnetwork/VirtualRoutingResource.java +++ b/core/src/com/cloud/agent/resource/virtualnetwork/VirtualRoutingResource.java @@ -649,25 +649,23 @@ public class VirtualRoutingResource { private GetRouterAlertsAnswer execute(GetRouterAlertsCommand cmd) { - String args = null; String routerIp = cmd.getAccessDetail(NetworkElementCommand.ROUTER_IP); - if (cmd.getPreviousAlertTimeStamp() != null) { - args = cmd.getPreviousAlertTimeStamp(); - } + String args = cmd.getPreviousAlertTimeStamp(); ExecutionResult result = _vrDeployer.executeInVR(routerIp, VRScripts.ROUTER_ALERTS, args); String alerts[] = null; String lastAlertTimestamp = null; - // CallHostPlugin results "success" when there are no alerts on virtual router - if (result.isSuccess()) { - if (!result.getDetails().isEmpty() && !result.getDetails().equals("No Alerts")) { - alerts = result.getDetails().split("\\\\n"); - String[] lastAlert = alerts[alerts.length - 1].split(" "); - lastAlertTimestamp = lastAlert[0] + " " + lastAlert[1]; - } - } - return new GetRouterAlertsAnswer(cmd, alerts, lastAlertTimestamp); + if (result.isSuccess()) { + if (!result.getDetails().isEmpty() && !result.getDetails().trim().equals("No Alerts")) { + alerts = result.getDetails().trim().split("\\\\n"); + String[] lastAlert = alerts[alerts.length - 1].split(","); + lastAlertTimestamp = lastAlert[0]; + } + return new GetRouterAlertsAnswer(cmd, alerts, lastAlertTimestamp); + } else { + return new GetRouterAlertsAnswer(cmd, result.getDetails()); + } } protected Answer execute(CheckRouterCommand cmd) { @@ -760,21 +758,6 @@ public class VirtualRoutingResource { return cfg; } - protected List generateConfig(GetRouterAlertsCommand cmd) { - LinkedList cfg = new LinkedList<>(); - - String args = null; - String routerIp = cmd.getAccessDetail(NetworkElementCommand.ROUTER_IP); - if (cmd.getPreviousAlertTimeStamp() != null) { - args = "getRouterAlerts.sh " + routerIp + " " + cmd.getPreviousAlertTimeStamp(); - } else { - args = "getRouterAlerts.sh " + routerIp; - } - - cfg.add(new ConfigItem(VRScripts.ROUTER_ALERTS, args)); - return cfg; - } - protected List generateConfig(SetupGuestNetworkCommand cmd) { LinkedList cfg = new LinkedList<>(); diff --git a/engine/schema/src/com/cloud/vm/dao/DomainRouterDao.java b/engine/schema/src/com/cloud/vm/dao/DomainRouterDao.java index e3f75fabd7e..72ff8a91dd8 100755 --- a/engine/schema/src/com/cloud/vm/dao/DomainRouterDao.java +++ b/engine/schema/src/com/cloud/vm/dao/DomainRouterDao.java @@ -108,6 +108,8 @@ public interface DomainRouterDao extends GenericDao { */ List listByStateAndNetworkType(State state, Network.GuestType type, long mgmtSrvrId); + List listByStateAndManagementServer(State state, long mgmtSrvrId); + List findByNetworkOutsideThePod(long networkId, long podId, State state, Role role); List listByNetworkAndPodAndRole(long networkId, long podId, Role role); diff --git a/engine/schema/src/com/cloud/vm/dao/DomainRouterDaoImpl.java b/engine/schema/src/com/cloud/vm/dao/DomainRouterDaoImpl.java index 6b62f568739..ea919ac5364 100755 --- a/engine/schema/src/com/cloud/vm/dao/DomainRouterDaoImpl.java +++ b/engine/schema/src/com/cloud/vm/dao/DomainRouterDaoImpl.java @@ -60,6 +60,7 @@ public class DomainRouterDaoImpl extends GenericDaoBase im protected SearchBuilder StateNetworkTypeSearch; protected SearchBuilder OutsidePodSearch; protected SearchBuilder clusterSearch; + protected SearchBuilder SearchByStateAndManagementServerId; @Inject HostDao _hostsDao; @Inject @@ -130,6 +131,14 @@ public class DomainRouterDaoImpl extends GenericDaoBase im StateNetworkTypeSearch.join("host", joinHost, joinHost.entity().getId(), StateNetworkTypeSearch.entity().getHostId(), JoinType.INNER); StateNetworkTypeSearch.done(); + SearchByStateAndManagementServerId = createSearchBuilder(); + SearchByStateAndManagementServerId.and("state", SearchByStateAndManagementServerId.entity().getState(), Op.EQ); + + SearchBuilder joinHost2 = _hostsDao.createSearchBuilder(); + joinHost2.and("mgmtServerId", joinHost2.entity().getManagementServerId(), Op.EQ); + SearchByStateAndManagementServerId.join("host", joinHost2, joinHost2.entity().getId(), SearchByStateAndManagementServerId.entity().getHostId(), JoinType.INNER); + SearchByStateAndManagementServerId.done(); + OutsidePodSearch = createSearchBuilder(); SearchBuilder joinRouterNetwork2 = _routerNetworkDao.createSearchBuilder(); joinRouterNetwork2.and("networkId", joinRouterNetwork2.entity().getNetworkId(), Op.EQ); @@ -293,6 +302,15 @@ public class DomainRouterDaoImpl extends GenericDaoBase im return routers; } + @Override + public List listByStateAndManagementServer(State state, long mgmtSrvrId) { + SearchCriteria sc = SearchByStateAndManagementServerId.create(); + sc.setParameters("state", state); + sc.setJoinParameters("host", "mgmtServerId", mgmtSrvrId); + + return listBy(sc); + } + @Override public List findByNetworkOutsideThePod(long networkId, long podId, State state, Role role) { SearchCriteria sc = OutsidePodSearch.create(); diff --git a/plugins/hypervisors/simulator/src/com/cloud/agent/manager/SimulatorManagerImpl.java b/plugins/hypervisors/simulator/src/com/cloud/agent/manager/SimulatorManagerImpl.java index 23979c0f46b..0bb0c5c8f6c 100644 --- a/plugins/hypervisors/simulator/src/com/cloud/agent/manager/SimulatorManagerImpl.java +++ b/plugins/hypervisors/simulator/src/com/cloud/agent/manager/SimulatorManagerImpl.java @@ -86,6 +86,7 @@ import com.cloud.agent.api.proxy.CheckConsoleProxyLoadCommand; import com.cloud.agent.api.proxy.WatchConsoleProxyLoadCommand; import com.cloud.agent.api.routing.AggregationControlCommand; import com.cloud.agent.api.routing.DhcpEntryCommand; +import com.cloud.agent.api.routing.GetRouterAlertsCommand; import com.cloud.agent.api.routing.IpAssocCommand; import com.cloud.agent.api.routing.IpAssocVpcCommand; import com.cloud.agent.api.routing.LoadBalancerConfigCommand; @@ -370,6 +371,8 @@ public class SimulatorManagerImpl extends ManagerBase implements SimulatorManage return _mockNetworkMgr.setupPVLAN((PvlanSetupCommand)cmd); } else if (cmd instanceof StorageSubSystemCommand) { return this.storageHandler.handleStorageCommands((StorageSubSystemCommand)cmd); + } else if (cmd instanceof GetRouterAlertsCommand) { + return new Answer(cmd); } else if (cmd instanceof VpnUsersCfgCommand || cmd instanceof RemoteAccessVpnCfgCommand || cmd instanceof SetMonitorServiceCommand || cmd instanceof AggregationControlCommand) { return new Answer(cmd); } else { diff --git a/server/src/com/cloud/network/router/VirtualNetworkApplianceManager.java b/server/src/com/cloud/network/router/VirtualNetworkApplianceManager.java index e3597acc313..ae418d239c8 100644 --- a/server/src/com/cloud/network/router/VirtualNetworkApplianceManager.java +++ b/server/src/com/cloud/network/router/VirtualNetworkApplianceManager.java @@ -51,6 +51,7 @@ public interface VirtualNetworkApplianceManager extends Manager, VirtualNetworkA static final String RouterTemplateHyperVCK = "router.template.hyperv"; static final String RouterTemplateLxcCK = "router.template.lxc"; static final String SetServiceMonitorCK = "network.router.EnableServiceMonitoring"; + static final String RouterAlertsCheckIntervalCK = "router.alerts.check.interval"; static final ConfigKey RouterTemplateXen = new ConfigKey(String.class, RouterTemplateXenCK, "Advanced", "SystemVM Template (XenServer)", "Name of the default router template on Xenserver.", true, ConfigKey.Scope.Zone, null); @@ -66,6 +67,9 @@ public interface VirtualNetworkApplianceManager extends Manager, VirtualNetworkA static final ConfigKey SetServiceMonitor = new ConfigKey(String.class, SetServiceMonitorCK, "Advanced", "true", "service monitoring in router enable/disable option, default true", true, ConfigKey.Scope.Zone, null); + static final ConfigKey RouterAlertsCheckInterval = new ConfigKey(Integer.class, RouterAlertsCheckIntervalCK, "Advanced", "1800", + "Interval (in seconds) to check for alerts in Virtual Router.", false, ConfigKey.Scope.Global, null); + public static final int DEFAULT_ROUTER_VM_RAMSIZE = 128; // 128M public static final int DEFAULT_ROUTER_CPU_MHZ = 500; // 500 MHz public static final boolean USE_POD_VLAN = false; diff --git a/server/src/com/cloud/network/router/VirtualNetworkApplianceManagerImpl.java b/server/src/com/cloud/network/router/VirtualNetworkApplianceManagerImpl.java index 0899f4288c6..3cd3e807f62 100755 --- a/server/src/com/cloud/network/router/VirtualNetworkApplianceManagerImpl.java +++ b/server/src/com/cloud/network/router/VirtualNetworkApplianceManagerImpl.java @@ -17,6 +17,8 @@ package com.cloud.network.router; +import java.text.ParseException; +import java.text.SimpleDateFormat; import java.util.ArrayList; import java.util.Arrays; import java.util.Calendar; @@ -824,6 +826,13 @@ public class VirtualNetworkApplianceManagerImpl extends ManagerBase implements V s_logger.debug("router.check.interval - " + _routerCheckInterval + " so not scheduling the redundant router checking thread"); } + int _routerAlertsCheckInterval = RouterAlertsCheckInterval.value(); + if (_routerAlertsCheckInterval > 0) { + _checkExecutor.scheduleAtFixedRate(new CheckRouterAlertsTask(), _routerAlertsCheckInterval, _routerAlertsCheckInterval, TimeUnit.SECONDS); + } else { + s_logger.debug("router.alerts.check.interval - " + _routerAlertsCheckInterval + " so not scheduling the router alerts checking thread"); + } + return true; } @@ -1358,8 +1367,6 @@ public class VirtualNetworkApplianceManagerImpl extends ManagerBase implements V updateSite2SiteVpnConnectionState(routers); - getRouterAlerts(); - final List networks = _networkDao.listRedundantNetworks(); s_logger.debug("Found " + networks.size() + " networks to update RvR status. "); for (final NetworkVO network : networks) { @@ -1374,20 +1381,33 @@ public class VirtualNetworkApplianceManagerImpl extends ManagerBase implements V } } - private void getRouterAlerts() { - try{ - List routersInIsolatedNetwork = _routerDao.listByStateAndNetworkType(State.Running, GuestType.Isolated, mgmtSrvrId); - List routersInSharedNetwork = _routerDao.listByStateAndNetworkType(State.Running, GuestType.Shared, mgmtSrvrId); + protected class CheckRouterAlertsTask extends ManagedContextRunnable { + public CheckRouterAlertsTask() { + } + + @Override + protected void runInContext() { + try { + getRouterAlerts(); + } catch (final Exception ex) { + s_logger.error("Fail to complete the CheckRouterAlertsTask! ", ex); + } + } + } + + protected void getRouterAlerts() { + try{ + List routers = _routerDao.listByStateAndManagementServer(State.Running, mgmtSrvrId); - List routers = new ArrayList(); - routers.addAll(routersInIsolatedNetwork); - routers.addAll(routersInSharedNetwork); s_logger.debug("Found " + routers.size() + " running routers. "); for (final DomainRouterVO router : routers) { - if (router.getVpcId() != null) { + String serviceMonitoringFlag = SetServiceMonitor.valueIn(router.getDataCenterId()); + // Skip the routers in VPC network or skip the routers where Monitor service is not enabled in the corresponding Zone + if ( !Boolean.parseBoolean(serviceMonitoringFlag) || router.getVpcId() != null) { continue; } + String privateIP = router.getPrivateIpAddress(); if (privateIP != null) { @@ -1395,23 +1415,49 @@ public class VirtualNetworkApplianceManagerImpl extends ManagerBase implements V GetRouterAlertsCommand command = null; if (opRouterMonitorServiceVO == null) { - command = new GetRouterAlertsCommand(null); + command = new GetRouterAlertsCommand(new String("1970-01-01 00:00:00")); // To avoid sending null value } else { command = new GetRouterAlertsCommand(opRouterMonitorServiceVO.getLastAlertTimestamp()); } command.setAccessDetail(NetworkElementCommand.ROUTER_IP, router.getPrivateIpAddress()); - command.setAccessDetail(NetworkElementCommand.ROUTER_NAME, router.getInstanceName()); - GetRouterAlertsAnswer answer = null; try { - answer = (GetRouterAlertsAnswer) _agentMgr.easySend(router.getHostId(), command); + final Answer origAnswer = _agentMgr.easySend(router.getHostId(), command); + GetRouterAlertsAnswer answer = null; + + if (origAnswer == null) { + s_logger.warn("Unable to get alerts from router " + router.getHostName()); + continue; + } + if (origAnswer instanceof GetRouterAlertsAnswer) { + answer = (GetRouterAlertsAnswer)origAnswer; + } else { + s_logger.warn("Unable to get alerts from router " + router.getHostName()); + continue; + } + if (!answer.getResult()) { + s_logger.warn("Unable to get alerts from router " + router.getHostName() + " " + answer.getDetails()); + continue; + } + String alerts[] = answer.getAlerts(); - if (alerts != null ) { + if (alerts != null) { + String lastAlertTimeStamp = answer.getTimeStamp(); + SimpleDateFormat sdfrmt = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss"); + sdfrmt.setLenient(false); + try + { + sdfrmt.parse(lastAlertTimeStamp); + } + catch (ParseException e) + { + s_logger.warn("Invalid last alert timestamp received while collecting alerts from router: " + router.getInstanceName()); + continue; + } for (String alert: alerts) { _alertMgr.sendAlert(AlertType.ALERT_TYPE_DOMAIN_ROUTER, router.getDataCenterId(), router.getPodIdToDeployIn(), "Monitoring Service on VR " + router.getInstanceName(), alert); } - String lastAlertTimeStamp = answer.getTimeStamp(); if (opRouterMonitorServiceVO == null) { opRouterMonitorServiceVO = new OpRouterMonitorServiceVO(router.getId(), router.getHostName(), lastAlertTimeStamp); _opRouterMonitorServiceDao.persist(opRouterMonitorServiceVO); @@ -1421,7 +1467,7 @@ public class VirtualNetworkApplianceManagerImpl extends ManagerBase implements V } } } catch (Exception e) { - s_logger.warn("Error while collecting alerts from router: " + router.getInstanceName() + " from host: " + router.getHostId(), e); + s_logger.warn("Error while collecting alerts from router: " + router.getInstanceName(), e); continue; } } @@ -1431,7 +1477,6 @@ public class VirtualNetworkApplianceManagerImpl extends ManagerBase implements V } } - private final static int DEFAULT_PRIORITY = 100; private final static int DEFAULT_DELTA = 2; @@ -4333,7 +4378,7 @@ public class VirtualNetworkApplianceManagerImpl extends ManagerBase implements V @Override public ConfigKey[] getConfigKeys() { - return new ConfigKey[] {UseExternalDnsServers, routerVersionCheckEnabled, SetServiceMonitor}; + return new ConfigKey[] {UseExternalDnsServers, routerVersionCheckEnabled, SetServiceMonitor, RouterAlertsCheckInterval}; } @Override diff --git a/systemvm/patches/debian/config/opt/cloud/bin/getRouterAlerts.sh b/systemvm/patches/debian/config/opt/cloud/bin/getRouterAlerts.sh index e5e8abeffda..3f5f4a3b05c 100644 --- a/systemvm/patches/debian/config/opt/cloud/bin/getRouterAlerts.sh +++ b/systemvm/patches/debian/config/opt/cloud/bin/getRouterAlerts.sh @@ -18,53 +18,38 @@ # getRouterAlerts.sh --- Send the alerts from routerServiceMonitor.log to Management Server -source /root/func.sh - -lock="biglock" -locked=$(getLockFile $lock) -if [ "$locked" != "1" ] -then - exit 1 -fi - #set -x filename=/var/log/routerServiceMonitor.log #Monitor service log file if [ -n "$1" -a -n "$2" ] then - reqdateval=$(date -d $1 +"%Y%m%d"); - reqtimeval=$(date -d $2 +"%H%M%S"); + reqDateVal=$(date -d "$1 $2" "+%s"); else - reqdateval=0 - reqtimeval=0 + reqDateVal=0 fi if [ -f $filename ] then while read line do - if [ -n "$line" ]; then - dateval=`echo $line |awk '{print $1}'` - timeval=`echo $line |awk '{print $2}'` - - todate=$(date -d "$dateval" +"%Y%m%d") > /dev/null - totime=$(date -d "$timeval" +"%H%M%S") > /dev/null - if [ "$todate" -gt "$reqdateval" ] > /dev/null + if [ -n "$line" ] then - if [ -n "$alerts" ]; then alerts="$alerts\n$line"; else alerts="$line"; fi #>> $outputfile - elif [ "$todate" -eq "$reqdateval" ] > /dev/null + dateval=`echo $line |awk '{print $1, $2}'` + IFS=',' read -a array <<< "$dateval" + dateval=${array[0]} + + toDateVal=$(date -d "$dateval" "+%s") + + if [ "$toDateVal" -gt "$reqDateVal" ] then - if [ "$totime" -gt "$reqtimeval" ] > /dev/null - then - if [ -n "$alerts" ]; then alerts="$alerts\n$line"; else alerts="$line"; fi #>> $outputfile - fi + alerts="$line\n$alerts" + else + break fi fi - done < $filename + done < <(tac $filename) fi if [ -n "$alerts" ]; then echo $alerts else echo "No Alerts" -fi - -unlock_exit 0 $lock $locked \ No newline at end of file +fi \ No newline at end of file diff --git a/test/integration/smoke/test_VirtualRouter_alerts.py b/test/integration/component/test_VirtualRouter_alerts.py similarity index 97% rename from test/integration/smoke/test_VirtualRouter_alerts.py rename to test/integration/component/test_VirtualRouter_alerts.py index caa9fc5b5e9..4b53e3fd2cd 100644 --- a/test/integration/smoke/test_VirtualRouter_alerts.py +++ b/test/integration/component/test_VirtualRouter_alerts.py @@ -156,7 +156,7 @@ class TestVRServiceFailureAlerting(cloudstackTestCase): return @attr(hypervisor="xenserver") - @attr(tags=["advanced", "basic", "provisioning"]) + @attr(tags=["advanced", "basic"]) def test_01_VRServiceFailureAlerting(self): @@ -229,7 +229,7 @@ class TestVRServiceFailureAlerting(cloudstackTestCase): res = str(result) self.debug("apache process status: %s" % res) - time.sleep(300) #wait for 5 minutes meanwhile monitor service on VR starts the apache service + time.sleep(2400) #wait for 40 minutes meanwhile monitor service on VR starts the apache service (router.alerts.check.interval default value is 30minutes) qresultset = self.dbclient.execute( "select id from alert where subject = '%s' ORDER BY id DESC LIMIT 1;" \