CLOUDSTACK-6253: Optimizing VR alerts getting algorithm In addition to this a new configuration parameter is added router.alerts.check.interval defaulted to 30minutes to check for alerts in Virtual Router

This commit is contained in:
Harikrishna Patnala 2014-04-09 17:57:16 +05:30 committed by Murali Reddy
parent bc17f17776
commit ecc71518a9
9 changed files with 121 additions and 85 deletions

View File

@ -22,7 +22,6 @@ import com.cloud.agent.api.routing.GetRouterAlertsCommand;
public class GetRouterAlertsAnswer extends Answer {
String routerName;
String[] alerts;
String timeStamp;
@ -36,8 +35,8 @@ public class GetRouterAlertsAnswer extends Answer {
}
public GetRouterAlertsAnswer(GetRouterAlertsCommand cmd, Exception ex) {
super(cmd, ex);
public GetRouterAlertsAnswer(GetRouterAlertsCommand cmd, String details) {
super(cmd, false, details);
}
public void setAlerts(String[] alerts) {
@ -56,7 +55,4 @@ public class GetRouterAlertsAnswer extends Answer {
return timeStamp;
}
public String getRouterName() {
return routerName;
}
}

View File

@ -649,25 +649,23 @@ public class VirtualRoutingResource {
private GetRouterAlertsAnswer execute(GetRouterAlertsCommand cmd) {
String args = null;
String routerIp = cmd.getAccessDetail(NetworkElementCommand.ROUTER_IP);
if (cmd.getPreviousAlertTimeStamp() != null) {
args = cmd.getPreviousAlertTimeStamp();
}
String args = cmd.getPreviousAlertTimeStamp();
ExecutionResult result = _vrDeployer.executeInVR(routerIp, VRScripts.ROUTER_ALERTS, args);
String alerts[] = null;
String lastAlertTimestamp = null;
// CallHostPlugin results "success" when there are no alerts on virtual router
if (result.isSuccess()) {
if (!result.getDetails().isEmpty() && !result.getDetails().equals("No Alerts")) {
alerts = result.getDetails().split("\\\\n");
String[] lastAlert = alerts[alerts.length - 1].split(" ");
lastAlertTimestamp = lastAlert[0] + " " + lastAlert[1];
}
}
return new GetRouterAlertsAnswer(cmd, alerts, lastAlertTimestamp);
if (result.isSuccess()) {
if (!result.getDetails().isEmpty() && !result.getDetails().trim().equals("No Alerts")) {
alerts = result.getDetails().trim().split("\\\\n");
String[] lastAlert = alerts[alerts.length - 1].split(",");
lastAlertTimestamp = lastAlert[0];
}
return new GetRouterAlertsAnswer(cmd, alerts, lastAlertTimestamp);
} else {
return new GetRouterAlertsAnswer(cmd, result.getDetails());
}
}
protected Answer execute(CheckRouterCommand cmd) {
@ -760,21 +758,6 @@ public class VirtualRoutingResource {
return cfg;
}
protected List<ConfigItem> generateConfig(GetRouterAlertsCommand cmd) {
LinkedList<ConfigItem> cfg = new LinkedList<>();
String args = null;
String routerIp = cmd.getAccessDetail(NetworkElementCommand.ROUTER_IP);
if (cmd.getPreviousAlertTimeStamp() != null) {
args = "getRouterAlerts.sh " + routerIp + " " + cmd.getPreviousAlertTimeStamp();
} else {
args = "getRouterAlerts.sh " + routerIp;
}
cfg.add(new ConfigItem(VRScripts.ROUTER_ALERTS, args));
return cfg;
}
protected List<ConfigItem> generateConfig(SetupGuestNetworkCommand cmd) {
LinkedList<ConfigItem> cfg = new LinkedList<>();

View File

@ -108,6 +108,8 @@ public interface DomainRouterDao extends GenericDao<DomainRouterVO, Long> {
*/
List<DomainRouterVO> listByStateAndNetworkType(State state, Network.GuestType type, long mgmtSrvrId);
List<DomainRouterVO> listByStateAndManagementServer(State state, long mgmtSrvrId);
List<DomainRouterVO> findByNetworkOutsideThePod(long networkId, long podId, State state, Role role);
List<DomainRouterVO> listByNetworkAndPodAndRole(long networkId, long podId, Role role);

View File

@ -60,6 +60,7 @@ public class DomainRouterDaoImpl extends GenericDaoBase<DomainRouterVO, Long> im
protected SearchBuilder<DomainRouterVO> StateNetworkTypeSearch;
protected SearchBuilder<DomainRouterVO> OutsidePodSearch;
protected SearchBuilder<DomainRouterVO> clusterSearch;
protected SearchBuilder<DomainRouterVO> SearchByStateAndManagementServerId;
@Inject
HostDao _hostsDao;
@Inject
@ -130,6 +131,14 @@ public class DomainRouterDaoImpl extends GenericDaoBase<DomainRouterVO, Long> im
StateNetworkTypeSearch.join("host", joinHost, joinHost.entity().getId(), StateNetworkTypeSearch.entity().getHostId(), JoinType.INNER);
StateNetworkTypeSearch.done();
SearchByStateAndManagementServerId = createSearchBuilder();
SearchByStateAndManagementServerId.and("state", SearchByStateAndManagementServerId.entity().getState(), Op.EQ);
SearchBuilder<HostVO> joinHost2 = _hostsDao.createSearchBuilder();
joinHost2.and("mgmtServerId", joinHost2.entity().getManagementServerId(), Op.EQ);
SearchByStateAndManagementServerId.join("host", joinHost2, joinHost2.entity().getId(), SearchByStateAndManagementServerId.entity().getHostId(), JoinType.INNER);
SearchByStateAndManagementServerId.done();
OutsidePodSearch = createSearchBuilder();
SearchBuilder<RouterNetworkVO> joinRouterNetwork2 = _routerNetworkDao.createSearchBuilder();
joinRouterNetwork2.and("networkId", joinRouterNetwork2.entity().getNetworkId(), Op.EQ);
@ -293,6 +302,15 @@ public class DomainRouterDaoImpl extends GenericDaoBase<DomainRouterVO, Long> im
return routers;
}
@Override
public List<DomainRouterVO> listByStateAndManagementServer(State state, long mgmtSrvrId) {
SearchCriteria<DomainRouterVO> sc = SearchByStateAndManagementServerId.create();
sc.setParameters("state", state);
sc.setJoinParameters("host", "mgmtServerId", mgmtSrvrId);
return listBy(sc);
}
@Override
public List<DomainRouterVO> findByNetworkOutsideThePod(long networkId, long podId, State state, Role role) {
SearchCriteria<DomainRouterVO> sc = OutsidePodSearch.create();

View File

@ -86,6 +86,7 @@ import com.cloud.agent.api.proxy.CheckConsoleProxyLoadCommand;
import com.cloud.agent.api.proxy.WatchConsoleProxyLoadCommand;
import com.cloud.agent.api.routing.AggregationControlCommand;
import com.cloud.agent.api.routing.DhcpEntryCommand;
import com.cloud.agent.api.routing.GetRouterAlertsCommand;
import com.cloud.agent.api.routing.IpAssocCommand;
import com.cloud.agent.api.routing.IpAssocVpcCommand;
import com.cloud.agent.api.routing.LoadBalancerConfigCommand;
@ -370,6 +371,8 @@ public class SimulatorManagerImpl extends ManagerBase implements SimulatorManage
return _mockNetworkMgr.setupPVLAN((PvlanSetupCommand)cmd);
} else if (cmd instanceof StorageSubSystemCommand) {
return this.storageHandler.handleStorageCommands((StorageSubSystemCommand)cmd);
} else if (cmd instanceof GetRouterAlertsCommand) {
return new Answer(cmd);
} else if (cmd instanceof VpnUsersCfgCommand || cmd instanceof RemoteAccessVpnCfgCommand || cmd instanceof SetMonitorServiceCommand || cmd instanceof AggregationControlCommand) {
return new Answer(cmd);
} else {

View File

@ -51,6 +51,7 @@ public interface VirtualNetworkApplianceManager extends Manager, VirtualNetworkA
static final String RouterTemplateHyperVCK = "router.template.hyperv";
static final String RouterTemplateLxcCK = "router.template.lxc";
static final String SetServiceMonitorCK = "network.router.EnableServiceMonitoring";
static final String RouterAlertsCheckIntervalCK = "router.alerts.check.interval";
static final ConfigKey<String> RouterTemplateXen = new ConfigKey<String>(String.class, RouterTemplateXenCK, "Advanced", "SystemVM Template (XenServer)",
"Name of the default router template on Xenserver.", true, ConfigKey.Scope.Zone, null);
@ -66,6 +67,9 @@ public interface VirtualNetworkApplianceManager extends Manager, VirtualNetworkA
static final ConfigKey<String> SetServiceMonitor = new ConfigKey<String>(String.class, SetServiceMonitorCK, "Advanced", "true",
"service monitoring in router enable/disable option, default true", true, ConfigKey.Scope.Zone, null);
static final ConfigKey<Integer> RouterAlertsCheckInterval = new ConfigKey<Integer>(Integer.class, RouterAlertsCheckIntervalCK, "Advanced", "1800",
"Interval (in seconds) to check for alerts in Virtual Router.", false, ConfigKey.Scope.Global, null);
public static final int DEFAULT_ROUTER_VM_RAMSIZE = 128; // 128M
public static final int DEFAULT_ROUTER_CPU_MHZ = 500; // 500 MHz
public static final boolean USE_POD_VLAN = false;

View File

@ -17,6 +17,8 @@
package com.cloud.network.router;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Calendar;
@ -824,6 +826,13 @@ public class VirtualNetworkApplianceManagerImpl extends ManagerBase implements V
s_logger.debug("router.check.interval - " + _routerCheckInterval + " so not scheduling the redundant router checking thread");
}
int _routerAlertsCheckInterval = RouterAlertsCheckInterval.value();
if (_routerAlertsCheckInterval > 0) {
_checkExecutor.scheduleAtFixedRate(new CheckRouterAlertsTask(), _routerAlertsCheckInterval, _routerAlertsCheckInterval, TimeUnit.SECONDS);
} else {
s_logger.debug("router.alerts.check.interval - " + _routerAlertsCheckInterval + " so not scheduling the router alerts checking thread");
}
return true;
}
@ -1358,8 +1367,6 @@ public class VirtualNetworkApplianceManagerImpl extends ManagerBase implements V
updateSite2SiteVpnConnectionState(routers);
getRouterAlerts();
final List<NetworkVO> networks = _networkDao.listRedundantNetworks();
s_logger.debug("Found " + networks.size() + " networks to update RvR status. ");
for (final NetworkVO network : networks) {
@ -1374,20 +1381,33 @@ public class VirtualNetworkApplianceManagerImpl extends ManagerBase implements V
}
}
private void getRouterAlerts() {
try{
List<DomainRouterVO> routersInIsolatedNetwork = _routerDao.listByStateAndNetworkType(State.Running, GuestType.Isolated, mgmtSrvrId);
List<DomainRouterVO> routersInSharedNetwork = _routerDao.listByStateAndNetworkType(State.Running, GuestType.Shared, mgmtSrvrId);
protected class CheckRouterAlertsTask extends ManagedContextRunnable {
public CheckRouterAlertsTask() {
}
@Override
protected void runInContext() {
try {
getRouterAlerts();
} catch (final Exception ex) {
s_logger.error("Fail to complete the CheckRouterAlertsTask! ", ex);
}
}
}
protected void getRouterAlerts() {
try{
List<DomainRouterVO> routers = _routerDao.listByStateAndManagementServer(State.Running, mgmtSrvrId);
List<DomainRouterVO> routers = new ArrayList<DomainRouterVO>();
routers.addAll(routersInIsolatedNetwork);
routers.addAll(routersInSharedNetwork);
s_logger.debug("Found " + routers.size() + " running routers. ");
for (final DomainRouterVO router : routers) {
if (router.getVpcId() != null) {
String serviceMonitoringFlag = SetServiceMonitor.valueIn(router.getDataCenterId());
// Skip the routers in VPC network or skip the routers where Monitor service is not enabled in the corresponding Zone
if ( !Boolean.parseBoolean(serviceMonitoringFlag) || router.getVpcId() != null) {
continue;
}
String privateIP = router.getPrivateIpAddress();
if (privateIP != null) {
@ -1395,23 +1415,49 @@ public class VirtualNetworkApplianceManagerImpl extends ManagerBase implements V
GetRouterAlertsCommand command = null;
if (opRouterMonitorServiceVO == null) {
command = new GetRouterAlertsCommand(null);
command = new GetRouterAlertsCommand(new String("1970-01-01 00:00:00")); // To avoid sending null value
} else {
command = new GetRouterAlertsCommand(opRouterMonitorServiceVO.getLastAlertTimestamp());
}
command.setAccessDetail(NetworkElementCommand.ROUTER_IP, router.getPrivateIpAddress());
command.setAccessDetail(NetworkElementCommand.ROUTER_NAME, router.getInstanceName());
GetRouterAlertsAnswer answer = null;
try {
answer = (GetRouterAlertsAnswer) _agentMgr.easySend(router.getHostId(), command);
final Answer origAnswer = _agentMgr.easySend(router.getHostId(), command);
GetRouterAlertsAnswer answer = null;
if (origAnswer == null) {
s_logger.warn("Unable to get alerts from router " + router.getHostName());
continue;
}
if (origAnswer instanceof GetRouterAlertsAnswer) {
answer = (GetRouterAlertsAnswer)origAnswer;
} else {
s_logger.warn("Unable to get alerts from router " + router.getHostName());
continue;
}
if (!answer.getResult()) {
s_logger.warn("Unable to get alerts from router " + router.getHostName() + " " + answer.getDetails());
continue;
}
String alerts[] = answer.getAlerts();
if (alerts != null ) {
if (alerts != null) {
String lastAlertTimeStamp = answer.getTimeStamp();
SimpleDateFormat sdfrmt = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss");
sdfrmt.setLenient(false);
try
{
sdfrmt.parse(lastAlertTimeStamp);
}
catch (ParseException e)
{
s_logger.warn("Invalid last alert timestamp received while collecting alerts from router: " + router.getInstanceName());
continue;
}
for (String alert: alerts) {
_alertMgr.sendAlert(AlertType.ALERT_TYPE_DOMAIN_ROUTER, router.getDataCenterId(), router.getPodIdToDeployIn(), "Monitoring Service on VR " + router.getInstanceName(), alert);
}
String lastAlertTimeStamp = answer.getTimeStamp();
if (opRouterMonitorServiceVO == null) {
opRouterMonitorServiceVO = new OpRouterMonitorServiceVO(router.getId(), router.getHostName(), lastAlertTimeStamp);
_opRouterMonitorServiceDao.persist(opRouterMonitorServiceVO);
@ -1421,7 +1467,7 @@ public class VirtualNetworkApplianceManagerImpl extends ManagerBase implements V
}
}
} catch (Exception e) {
s_logger.warn("Error while collecting alerts from router: " + router.getInstanceName() + " from host: " + router.getHostId(), e);
s_logger.warn("Error while collecting alerts from router: " + router.getInstanceName(), e);
continue;
}
}
@ -1431,7 +1477,6 @@ public class VirtualNetworkApplianceManagerImpl extends ManagerBase implements V
}
}
private final static int DEFAULT_PRIORITY = 100;
private final static int DEFAULT_DELTA = 2;
@ -4333,7 +4378,7 @@ public class VirtualNetworkApplianceManagerImpl extends ManagerBase implements V
@Override
public ConfigKey<?>[] getConfigKeys() {
return new ConfigKey<?>[] {UseExternalDnsServers, routerVersionCheckEnabled, SetServiceMonitor};
return new ConfigKey<?>[] {UseExternalDnsServers, routerVersionCheckEnabled, SetServiceMonitor, RouterAlertsCheckInterval};
}
@Override

View File

@ -18,53 +18,38 @@
# getRouterAlerts.sh --- Send the alerts from routerServiceMonitor.log to Management Server
source /root/func.sh
lock="biglock"
locked=$(getLockFile $lock)
if [ "$locked" != "1" ]
then
exit 1
fi
#set -x
filename=/var/log/routerServiceMonitor.log #Monitor service log file
if [ -n "$1" -a -n "$2" ]
then
reqdateval=$(date -d $1 +"%Y%m%d");
reqtimeval=$(date -d $2 +"%H%M%S");
reqDateVal=$(date -d "$1 $2" "+%s");
else
reqdateval=0
reqtimeval=0
reqDateVal=0
fi
if [ -f $filename ]
then
while read line
do
if [ -n "$line" ]; then
dateval=`echo $line |awk '{print $1}'`
timeval=`echo $line |awk '{print $2}'`
todate=$(date -d "$dateval" +"%Y%m%d") > /dev/null
totime=$(date -d "$timeval" +"%H%M%S") > /dev/null
if [ "$todate" -gt "$reqdateval" ] > /dev/null
if [ -n "$line" ]
then
if [ -n "$alerts" ]; then alerts="$alerts\n$line"; else alerts="$line"; fi #>> $outputfile
elif [ "$todate" -eq "$reqdateval" ] > /dev/null
dateval=`echo $line |awk '{print $1, $2}'`
IFS=',' read -a array <<< "$dateval"
dateval=${array[0]}
toDateVal=$(date -d "$dateval" "+%s")
if [ "$toDateVal" -gt "$reqDateVal" ]
then
if [ "$totime" -gt "$reqtimeval" ] > /dev/null
then
if [ -n "$alerts" ]; then alerts="$alerts\n$line"; else alerts="$line"; fi #>> $outputfile
fi
alerts="$line\n$alerts"
else
break
fi
fi
done < $filename
done < <(tac $filename)
fi
if [ -n "$alerts" ]; then
echo $alerts
else
echo "No Alerts"
fi
unlock_exit 0 $lock $locked
fi

View File

@ -156,7 +156,7 @@ class TestVRServiceFailureAlerting(cloudstackTestCase):
return
@attr(hypervisor="xenserver")
@attr(tags=["advanced", "basic", "provisioning"])
@attr(tags=["advanced", "basic"])
def test_01_VRServiceFailureAlerting(self):
@ -229,7 +229,7 @@ class TestVRServiceFailureAlerting(cloudstackTestCase):
res = str(result)
self.debug("apache process status: %s" % res)
time.sleep(300) #wait for 5 minutes meanwhile monitor service on VR starts the apache service
time.sleep(2400) #wait for 40 minutes meanwhile monitor service on VR starts the apache service (router.alerts.check.interval default value is 30minutes)
qresultset = self.dbclient.execute(
"select id from alert where subject = '%s' ORDER BY id DESC LIMIT 1;" \