mirror of https://github.com/apache/cloudstack.git
CLOUDSTACK-6253: Optimizing VR alerts getting algorithm In addition to this a new configuration parameter is added router.alerts.check.interval defaulted to 30minutes to check for alerts in Virtual Router
This commit is contained in:
parent
bc17f17776
commit
ecc71518a9
|
|
@ -22,7 +22,6 @@ import com.cloud.agent.api.routing.GetRouterAlertsCommand;
|
|||
|
||||
public class GetRouterAlertsAnswer extends Answer {
|
||||
|
||||
String routerName;
|
||||
String[] alerts;
|
||||
String timeStamp;
|
||||
|
||||
|
|
@ -36,8 +35,8 @@ public class GetRouterAlertsAnswer extends Answer {
|
|||
}
|
||||
|
||||
|
||||
public GetRouterAlertsAnswer(GetRouterAlertsCommand cmd, Exception ex) {
|
||||
super(cmd, ex);
|
||||
public GetRouterAlertsAnswer(GetRouterAlertsCommand cmd, String details) {
|
||||
super(cmd, false, details);
|
||||
}
|
||||
|
||||
public void setAlerts(String[] alerts) {
|
||||
|
|
@ -56,7 +55,4 @@ public class GetRouterAlertsAnswer extends Answer {
|
|||
return timeStamp;
|
||||
}
|
||||
|
||||
public String getRouterName() {
|
||||
return routerName;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -649,25 +649,23 @@ public class VirtualRoutingResource {
|
|||
|
||||
private GetRouterAlertsAnswer execute(GetRouterAlertsCommand cmd) {
|
||||
|
||||
String args = null;
|
||||
String routerIp = cmd.getAccessDetail(NetworkElementCommand.ROUTER_IP);
|
||||
if (cmd.getPreviousAlertTimeStamp() != null) {
|
||||
args = cmd.getPreviousAlertTimeStamp();
|
||||
}
|
||||
String args = cmd.getPreviousAlertTimeStamp();
|
||||
|
||||
ExecutionResult result = _vrDeployer.executeInVR(routerIp, VRScripts.ROUTER_ALERTS, args);
|
||||
String alerts[] = null;
|
||||
String lastAlertTimestamp = null;
|
||||
// CallHostPlugin results "success" when there are no alerts on virtual router
|
||||
if (result.isSuccess()) {
|
||||
if (!result.getDetails().isEmpty() && !result.getDetails().equals("No Alerts")) {
|
||||
alerts = result.getDetails().split("\\\\n");
|
||||
String[] lastAlert = alerts[alerts.length - 1].split(" ");
|
||||
lastAlertTimestamp = lastAlert[0] + " " + lastAlert[1];
|
||||
}
|
||||
}
|
||||
|
||||
return new GetRouterAlertsAnswer(cmd, alerts, lastAlertTimestamp);
|
||||
if (result.isSuccess()) {
|
||||
if (!result.getDetails().isEmpty() && !result.getDetails().trim().equals("No Alerts")) {
|
||||
alerts = result.getDetails().trim().split("\\\\n");
|
||||
String[] lastAlert = alerts[alerts.length - 1].split(",");
|
||||
lastAlertTimestamp = lastAlert[0];
|
||||
}
|
||||
return new GetRouterAlertsAnswer(cmd, alerts, lastAlertTimestamp);
|
||||
} else {
|
||||
return new GetRouterAlertsAnswer(cmd, result.getDetails());
|
||||
}
|
||||
}
|
||||
|
||||
protected Answer execute(CheckRouterCommand cmd) {
|
||||
|
|
@ -760,21 +758,6 @@ public class VirtualRoutingResource {
|
|||
return cfg;
|
||||
}
|
||||
|
||||
protected List<ConfigItem> generateConfig(GetRouterAlertsCommand cmd) {
|
||||
LinkedList<ConfigItem> cfg = new LinkedList<>();
|
||||
|
||||
String args = null;
|
||||
String routerIp = cmd.getAccessDetail(NetworkElementCommand.ROUTER_IP);
|
||||
if (cmd.getPreviousAlertTimeStamp() != null) {
|
||||
args = "getRouterAlerts.sh " + routerIp + " " + cmd.getPreviousAlertTimeStamp();
|
||||
} else {
|
||||
args = "getRouterAlerts.sh " + routerIp;
|
||||
}
|
||||
|
||||
cfg.add(new ConfigItem(VRScripts.ROUTER_ALERTS, args));
|
||||
return cfg;
|
||||
}
|
||||
|
||||
protected List<ConfigItem> generateConfig(SetupGuestNetworkCommand cmd) {
|
||||
LinkedList<ConfigItem> cfg = new LinkedList<>();
|
||||
|
||||
|
|
|
|||
|
|
@ -108,6 +108,8 @@ public interface DomainRouterDao extends GenericDao<DomainRouterVO, Long> {
|
|||
*/
|
||||
List<DomainRouterVO> listByStateAndNetworkType(State state, Network.GuestType type, long mgmtSrvrId);
|
||||
|
||||
List<DomainRouterVO> listByStateAndManagementServer(State state, long mgmtSrvrId);
|
||||
|
||||
List<DomainRouterVO> findByNetworkOutsideThePod(long networkId, long podId, State state, Role role);
|
||||
|
||||
List<DomainRouterVO> listByNetworkAndPodAndRole(long networkId, long podId, Role role);
|
||||
|
|
|
|||
|
|
@ -60,6 +60,7 @@ public class DomainRouterDaoImpl extends GenericDaoBase<DomainRouterVO, Long> im
|
|||
protected SearchBuilder<DomainRouterVO> StateNetworkTypeSearch;
|
||||
protected SearchBuilder<DomainRouterVO> OutsidePodSearch;
|
||||
protected SearchBuilder<DomainRouterVO> clusterSearch;
|
||||
protected SearchBuilder<DomainRouterVO> SearchByStateAndManagementServerId;
|
||||
@Inject
|
||||
HostDao _hostsDao;
|
||||
@Inject
|
||||
|
|
@ -130,6 +131,14 @@ public class DomainRouterDaoImpl extends GenericDaoBase<DomainRouterVO, Long> im
|
|||
StateNetworkTypeSearch.join("host", joinHost, joinHost.entity().getId(), StateNetworkTypeSearch.entity().getHostId(), JoinType.INNER);
|
||||
StateNetworkTypeSearch.done();
|
||||
|
||||
SearchByStateAndManagementServerId = createSearchBuilder();
|
||||
SearchByStateAndManagementServerId.and("state", SearchByStateAndManagementServerId.entity().getState(), Op.EQ);
|
||||
|
||||
SearchBuilder<HostVO> joinHost2 = _hostsDao.createSearchBuilder();
|
||||
joinHost2.and("mgmtServerId", joinHost2.entity().getManagementServerId(), Op.EQ);
|
||||
SearchByStateAndManagementServerId.join("host", joinHost2, joinHost2.entity().getId(), SearchByStateAndManagementServerId.entity().getHostId(), JoinType.INNER);
|
||||
SearchByStateAndManagementServerId.done();
|
||||
|
||||
OutsidePodSearch = createSearchBuilder();
|
||||
SearchBuilder<RouterNetworkVO> joinRouterNetwork2 = _routerNetworkDao.createSearchBuilder();
|
||||
joinRouterNetwork2.and("networkId", joinRouterNetwork2.entity().getNetworkId(), Op.EQ);
|
||||
|
|
@ -293,6 +302,15 @@ public class DomainRouterDaoImpl extends GenericDaoBase<DomainRouterVO, Long> im
|
|||
return routers;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<DomainRouterVO> listByStateAndManagementServer(State state, long mgmtSrvrId) {
|
||||
SearchCriteria<DomainRouterVO> sc = SearchByStateAndManagementServerId.create();
|
||||
sc.setParameters("state", state);
|
||||
sc.setJoinParameters("host", "mgmtServerId", mgmtSrvrId);
|
||||
|
||||
return listBy(sc);
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<DomainRouterVO> findByNetworkOutsideThePod(long networkId, long podId, State state, Role role) {
|
||||
SearchCriteria<DomainRouterVO> sc = OutsidePodSearch.create();
|
||||
|
|
|
|||
|
|
@ -86,6 +86,7 @@ import com.cloud.agent.api.proxy.CheckConsoleProxyLoadCommand;
|
|||
import com.cloud.agent.api.proxy.WatchConsoleProxyLoadCommand;
|
||||
import com.cloud.agent.api.routing.AggregationControlCommand;
|
||||
import com.cloud.agent.api.routing.DhcpEntryCommand;
|
||||
import com.cloud.agent.api.routing.GetRouterAlertsCommand;
|
||||
import com.cloud.agent.api.routing.IpAssocCommand;
|
||||
import com.cloud.agent.api.routing.IpAssocVpcCommand;
|
||||
import com.cloud.agent.api.routing.LoadBalancerConfigCommand;
|
||||
|
|
@ -370,6 +371,8 @@ public class SimulatorManagerImpl extends ManagerBase implements SimulatorManage
|
|||
return _mockNetworkMgr.setupPVLAN((PvlanSetupCommand)cmd);
|
||||
} else if (cmd instanceof StorageSubSystemCommand) {
|
||||
return this.storageHandler.handleStorageCommands((StorageSubSystemCommand)cmd);
|
||||
} else if (cmd instanceof GetRouterAlertsCommand) {
|
||||
return new Answer(cmd);
|
||||
} else if (cmd instanceof VpnUsersCfgCommand || cmd instanceof RemoteAccessVpnCfgCommand || cmd instanceof SetMonitorServiceCommand || cmd instanceof AggregationControlCommand) {
|
||||
return new Answer(cmd);
|
||||
} else {
|
||||
|
|
|
|||
|
|
@ -51,6 +51,7 @@ public interface VirtualNetworkApplianceManager extends Manager, VirtualNetworkA
|
|||
static final String RouterTemplateHyperVCK = "router.template.hyperv";
|
||||
static final String RouterTemplateLxcCK = "router.template.lxc";
|
||||
static final String SetServiceMonitorCK = "network.router.EnableServiceMonitoring";
|
||||
static final String RouterAlertsCheckIntervalCK = "router.alerts.check.interval";
|
||||
|
||||
static final ConfigKey<String> RouterTemplateXen = new ConfigKey<String>(String.class, RouterTemplateXenCK, "Advanced", "SystemVM Template (XenServer)",
|
||||
"Name of the default router template on Xenserver.", true, ConfigKey.Scope.Zone, null);
|
||||
|
|
@ -66,6 +67,9 @@ public interface VirtualNetworkApplianceManager extends Manager, VirtualNetworkA
|
|||
static final ConfigKey<String> SetServiceMonitor = new ConfigKey<String>(String.class, SetServiceMonitorCK, "Advanced", "true",
|
||||
"service monitoring in router enable/disable option, default true", true, ConfigKey.Scope.Zone, null);
|
||||
|
||||
static final ConfigKey<Integer> RouterAlertsCheckInterval = new ConfigKey<Integer>(Integer.class, RouterAlertsCheckIntervalCK, "Advanced", "1800",
|
||||
"Interval (in seconds) to check for alerts in Virtual Router.", false, ConfigKey.Scope.Global, null);
|
||||
|
||||
public static final int DEFAULT_ROUTER_VM_RAMSIZE = 128; // 128M
|
||||
public static final int DEFAULT_ROUTER_CPU_MHZ = 500; // 500 MHz
|
||||
public static final boolean USE_POD_VLAN = false;
|
||||
|
|
|
|||
|
|
@ -17,6 +17,8 @@
|
|||
|
||||
package com.cloud.network.router;
|
||||
|
||||
import java.text.ParseException;
|
||||
import java.text.SimpleDateFormat;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Arrays;
|
||||
import java.util.Calendar;
|
||||
|
|
@ -824,6 +826,13 @@ public class VirtualNetworkApplianceManagerImpl extends ManagerBase implements V
|
|||
s_logger.debug("router.check.interval - " + _routerCheckInterval + " so not scheduling the redundant router checking thread");
|
||||
}
|
||||
|
||||
int _routerAlertsCheckInterval = RouterAlertsCheckInterval.value();
|
||||
if (_routerAlertsCheckInterval > 0) {
|
||||
_checkExecutor.scheduleAtFixedRate(new CheckRouterAlertsTask(), _routerAlertsCheckInterval, _routerAlertsCheckInterval, TimeUnit.SECONDS);
|
||||
} else {
|
||||
s_logger.debug("router.alerts.check.interval - " + _routerAlertsCheckInterval + " so not scheduling the router alerts checking thread");
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
|
@ -1358,8 +1367,6 @@ public class VirtualNetworkApplianceManagerImpl extends ManagerBase implements V
|
|||
|
||||
updateSite2SiteVpnConnectionState(routers);
|
||||
|
||||
getRouterAlerts();
|
||||
|
||||
final List<NetworkVO> networks = _networkDao.listRedundantNetworks();
|
||||
s_logger.debug("Found " + networks.size() + " networks to update RvR status. ");
|
||||
for (final NetworkVO network : networks) {
|
||||
|
|
@ -1374,20 +1381,33 @@ public class VirtualNetworkApplianceManagerImpl extends ManagerBase implements V
|
|||
}
|
||||
}
|
||||
|
||||
private void getRouterAlerts() {
|
||||
try{
|
||||
List<DomainRouterVO> routersInIsolatedNetwork = _routerDao.listByStateAndNetworkType(State.Running, GuestType.Isolated, mgmtSrvrId);
|
||||
List<DomainRouterVO> routersInSharedNetwork = _routerDao.listByStateAndNetworkType(State.Running, GuestType.Shared, mgmtSrvrId);
|
||||
protected class CheckRouterAlertsTask extends ManagedContextRunnable {
|
||||
public CheckRouterAlertsTask() {
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void runInContext() {
|
||||
try {
|
||||
getRouterAlerts();
|
||||
} catch (final Exception ex) {
|
||||
s_logger.error("Fail to complete the CheckRouterAlertsTask! ", ex);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
protected void getRouterAlerts() {
|
||||
try{
|
||||
List<DomainRouterVO> routers = _routerDao.listByStateAndManagementServer(State.Running, mgmtSrvrId);
|
||||
|
||||
List<DomainRouterVO> routers = new ArrayList<DomainRouterVO>();
|
||||
routers.addAll(routersInIsolatedNetwork);
|
||||
routers.addAll(routersInSharedNetwork);
|
||||
s_logger.debug("Found " + routers.size() + " running routers. ");
|
||||
|
||||
for (final DomainRouterVO router : routers) {
|
||||
if (router.getVpcId() != null) {
|
||||
String serviceMonitoringFlag = SetServiceMonitor.valueIn(router.getDataCenterId());
|
||||
// Skip the routers in VPC network or skip the routers where Monitor service is not enabled in the corresponding Zone
|
||||
if ( !Boolean.parseBoolean(serviceMonitoringFlag) || router.getVpcId() != null) {
|
||||
continue;
|
||||
}
|
||||
|
||||
String privateIP = router.getPrivateIpAddress();
|
||||
|
||||
if (privateIP != null) {
|
||||
|
|
@ -1395,23 +1415,49 @@ public class VirtualNetworkApplianceManagerImpl extends ManagerBase implements V
|
|||
|
||||
GetRouterAlertsCommand command = null;
|
||||
if (opRouterMonitorServiceVO == null) {
|
||||
command = new GetRouterAlertsCommand(null);
|
||||
command = new GetRouterAlertsCommand(new String("1970-01-01 00:00:00")); // To avoid sending null value
|
||||
} else {
|
||||
command = new GetRouterAlertsCommand(opRouterMonitorServiceVO.getLastAlertTimestamp());
|
||||
}
|
||||
|
||||
command.setAccessDetail(NetworkElementCommand.ROUTER_IP, router.getPrivateIpAddress());
|
||||
command.setAccessDetail(NetworkElementCommand.ROUTER_NAME, router.getInstanceName());
|
||||
|
||||
GetRouterAlertsAnswer answer = null;
|
||||
try {
|
||||
answer = (GetRouterAlertsAnswer) _agentMgr.easySend(router.getHostId(), command);
|
||||
final Answer origAnswer = _agentMgr.easySend(router.getHostId(), command);
|
||||
GetRouterAlertsAnswer answer = null;
|
||||
|
||||
if (origAnswer == null) {
|
||||
s_logger.warn("Unable to get alerts from router " + router.getHostName());
|
||||
continue;
|
||||
}
|
||||
if (origAnswer instanceof GetRouterAlertsAnswer) {
|
||||
answer = (GetRouterAlertsAnswer)origAnswer;
|
||||
} else {
|
||||
s_logger.warn("Unable to get alerts from router " + router.getHostName());
|
||||
continue;
|
||||
}
|
||||
if (!answer.getResult()) {
|
||||
s_logger.warn("Unable to get alerts from router " + router.getHostName() + " " + answer.getDetails());
|
||||
continue;
|
||||
}
|
||||
|
||||
String alerts[] = answer.getAlerts();
|
||||
if (alerts != null ) {
|
||||
if (alerts != null) {
|
||||
String lastAlertTimeStamp = answer.getTimeStamp();
|
||||
SimpleDateFormat sdfrmt = new SimpleDateFormat("yyyy-MM-dd hh:mm:ss");
|
||||
sdfrmt.setLenient(false);
|
||||
try
|
||||
{
|
||||
sdfrmt.parse(lastAlertTimeStamp);
|
||||
}
|
||||
catch (ParseException e)
|
||||
{
|
||||
s_logger.warn("Invalid last alert timestamp received while collecting alerts from router: " + router.getInstanceName());
|
||||
continue;
|
||||
}
|
||||
for (String alert: alerts) {
|
||||
_alertMgr.sendAlert(AlertType.ALERT_TYPE_DOMAIN_ROUTER, router.getDataCenterId(), router.getPodIdToDeployIn(), "Monitoring Service on VR " + router.getInstanceName(), alert);
|
||||
}
|
||||
String lastAlertTimeStamp = answer.getTimeStamp();
|
||||
if (opRouterMonitorServiceVO == null) {
|
||||
opRouterMonitorServiceVO = new OpRouterMonitorServiceVO(router.getId(), router.getHostName(), lastAlertTimeStamp);
|
||||
_opRouterMonitorServiceDao.persist(opRouterMonitorServiceVO);
|
||||
|
|
@ -1421,7 +1467,7 @@ public class VirtualNetworkApplianceManagerImpl extends ManagerBase implements V
|
|||
}
|
||||
}
|
||||
} catch (Exception e) {
|
||||
s_logger.warn("Error while collecting alerts from router: " + router.getInstanceName() + " from host: " + router.getHostId(), e);
|
||||
s_logger.warn("Error while collecting alerts from router: " + router.getInstanceName(), e);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
|
@ -1431,7 +1477,6 @@ public class VirtualNetworkApplianceManagerImpl extends ManagerBase implements V
|
|||
}
|
||||
}
|
||||
|
||||
|
||||
private final static int DEFAULT_PRIORITY = 100;
|
||||
private final static int DEFAULT_DELTA = 2;
|
||||
|
||||
|
|
@ -4333,7 +4378,7 @@ public class VirtualNetworkApplianceManagerImpl extends ManagerBase implements V
|
|||
|
||||
@Override
|
||||
public ConfigKey<?>[] getConfigKeys() {
|
||||
return new ConfigKey<?>[] {UseExternalDnsServers, routerVersionCheckEnabled, SetServiceMonitor};
|
||||
return new ConfigKey<?>[] {UseExternalDnsServers, routerVersionCheckEnabled, SetServiceMonitor, RouterAlertsCheckInterval};
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
|||
|
|
@ -18,53 +18,38 @@
|
|||
|
||||
# getRouterAlerts.sh --- Send the alerts from routerServiceMonitor.log to Management Server
|
||||
|
||||
source /root/func.sh
|
||||
|
||||
lock="biglock"
|
||||
locked=$(getLockFile $lock)
|
||||
if [ "$locked" != "1" ]
|
||||
then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
#set -x
|
||||
|
||||
filename=/var/log/routerServiceMonitor.log #Monitor service log file
|
||||
if [ -n "$1" -a -n "$2" ]
|
||||
then
|
||||
reqdateval=$(date -d $1 +"%Y%m%d");
|
||||
reqtimeval=$(date -d $2 +"%H%M%S");
|
||||
reqDateVal=$(date -d "$1 $2" "+%s");
|
||||
else
|
||||
reqdateval=0
|
||||
reqtimeval=0
|
||||
reqDateVal=0
|
||||
fi
|
||||
if [ -f $filename ]
|
||||
then
|
||||
while read line
|
||||
do
|
||||
if [ -n "$line" ]; then
|
||||
dateval=`echo $line |awk '{print $1}'`
|
||||
timeval=`echo $line |awk '{print $2}'`
|
||||
|
||||
todate=$(date -d "$dateval" +"%Y%m%d") > /dev/null
|
||||
totime=$(date -d "$timeval" +"%H%M%S") > /dev/null
|
||||
if [ "$todate" -gt "$reqdateval" ] > /dev/null
|
||||
if [ -n "$line" ]
|
||||
then
|
||||
if [ -n "$alerts" ]; then alerts="$alerts\n$line"; else alerts="$line"; fi #>> $outputfile
|
||||
elif [ "$todate" -eq "$reqdateval" ] > /dev/null
|
||||
dateval=`echo $line |awk '{print $1, $2}'`
|
||||
IFS=',' read -a array <<< "$dateval"
|
||||
dateval=${array[0]}
|
||||
|
||||
toDateVal=$(date -d "$dateval" "+%s")
|
||||
|
||||
if [ "$toDateVal" -gt "$reqDateVal" ]
|
||||
then
|
||||
if [ "$totime" -gt "$reqtimeval" ] > /dev/null
|
||||
then
|
||||
if [ -n "$alerts" ]; then alerts="$alerts\n$line"; else alerts="$line"; fi #>> $outputfile
|
||||
fi
|
||||
alerts="$line\n$alerts"
|
||||
else
|
||||
break
|
||||
fi
|
||||
fi
|
||||
done < $filename
|
||||
done < <(tac $filename)
|
||||
fi
|
||||
if [ -n "$alerts" ]; then
|
||||
echo $alerts
|
||||
else
|
||||
echo "No Alerts"
|
||||
fi
|
||||
|
||||
unlock_exit 0 $lock $locked
|
||||
fi
|
||||
|
|
@ -156,7 +156,7 @@ class TestVRServiceFailureAlerting(cloudstackTestCase):
|
|||
return
|
||||
|
||||
@attr(hypervisor="xenserver")
|
||||
@attr(tags=["advanced", "basic", "provisioning"])
|
||||
@attr(tags=["advanced", "basic"])
|
||||
def test_01_VRServiceFailureAlerting(self):
|
||||
|
||||
|
||||
|
|
@ -229,7 +229,7 @@ class TestVRServiceFailureAlerting(cloudstackTestCase):
|
|||
res = str(result)
|
||||
self.debug("apache process status: %s" % res)
|
||||
|
||||
time.sleep(300) #wait for 5 minutes meanwhile monitor service on VR starts the apache service
|
||||
time.sleep(2400) #wait for 40 minutes meanwhile monitor service on VR starts the apache service (router.alerts.check.interval default value is 30minutes)
|
||||
|
||||
qresultset = self.dbclient.execute(
|
||||
"select id from alert where subject = '%s' ORDER BY id DESC LIMIT 1;" \
|
||||
Loading…
Reference in New Issue