From 5e450e98527d42998a90494b082dbec57342f8e4 Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Tue, 30 Aug 2011 17:52:49 -0700 Subject: [PATCH] bug 11305: Stop disconnected redundant virtual router if we can process with the connected one When we apply rules or start new VM, we may encounter some running routers that we can't program. That can due to network issue or host is down or vCenter is disconnected, etc. To keep the synchronization, we would stop them, but only when there is the other router we've successfully updated. If both routers are unable to communicate with, we simply give up and report it user. --- .../VirtualNetworkApplianceManagerImpl.java | 193 ++++++++++++++---- 1 file changed, 154 insertions(+), 39 deletions(-) diff --git a/server/src/com/cloud/network/router/VirtualNetworkApplianceManagerImpl.java b/server/src/com/cloud/network/router/VirtualNetworkApplianceManagerImpl.java index 036c79bd1d4..1d3bf250a92 100755 --- a/server/src/com/cloud/network/router/VirtualNetworkApplianceManagerImpl.java +++ b/server/src/com/cloud/network/router/VirtualNetworkApplianceManagerImpl.java @@ -1063,14 +1063,14 @@ public class VirtualNetworkApplianceManagerImpl implements VirtualNetworkApplian for (DomainRouterVO router : routers) { boolean skip = false; - if (router.getHostId() != null) { + State state = router.getState(); + if (router.getHostId() != null && state != State.Running) { HostVO host = _hostDao.findById(router.getHostId()); if (host == null || host.getStatus() != Status.Up) { skip = true; } } if (!skip) { - State state = router.getState(); if (state != State.Running) { router = startVirtualRouter(router, _accountService.getSystemUser(), _accountService.getSystemAccount(), params); } @@ -1661,6 +1661,8 @@ public class VirtualNetworkApplianceManagerImpl implements VirtualNetworkApplian String zoneName = _dcDao.findById(network.getDataCenterId()).getName(); boolean isZoneBasic = (dc.getNetworkType() == NetworkType.Basic); + List connectedRouters = new ArrayList(); + List disconnectedRouters = new ArrayList(); for (DomainRouterVO router : routers) { if (router.getState() != State.Running) { s_logger.warn("Unable to add virtual machine " + profile.getVirtualMachine() + " to the router " + router + " as the router is not in Running state"); @@ -1736,9 +1738,16 @@ public class VirtualNetworkApplianceManagerImpl implements VirtualNetworkApplian } try { _agentMgr.send(router.getHostId(), cmds); + } catch (AgentUnavailableException e){ + s_logger.warn("Unable to reach the agent " + router.getHostId(), e); + disconnectedRouters.add(router); + continue; } catch (OperationTimedoutException e) { - throw new AgentUnavailableException("Unable to reach the agent ", router.getHostId(), e); + s_logger.warn("Connection timeout on host " + router.getHostId(), e); + disconnectedRouters.add(router); + continue; } + connectedRouters.add(router); Answer answer = cmds.getAnswer("dhcp"); if (!answer.getResult()) { @@ -1770,6 +1779,20 @@ public class VirtualNetworkApplianceManagerImpl implements VirtualNetworkApplian rets.add(router); } + + String msg = "Unable to add new VM into network on disconnected router "; + if (!connectedRouters.isEmpty()) { + // These disconnected ones are out of sync now, stop them for synchronization + stopDisconnectedRouters(disconnectedRouters, true, msg); + } else if (!disconnectedRouters.isEmpty()) { + for (VirtualRouter router : disconnectedRouters) { + if (s_logger.isDebugEnabled()) { + s_logger.debug(msg + router.getInstanceName() + "(" + router.getId() + ")"); + } + } + throw new ResourceUnavailableException(msg, VirtualRouter.class, disconnectedRouters.get(0).getId()); + } + return rets; } @@ -1779,6 +1802,7 @@ public class VirtualNetworkApplianceManagerImpl implements VirtualNetworkApplian } @Override + //FIXME add partial success and STOP state support public String[] applyVpnUsers(Network network, List users, List routers) throws ResourceUnavailableException { if (routers == null || routers.isEmpty()) { s_logger.warn("Failed to add/remove VPN users: no router found for account and zone"); @@ -2135,6 +2159,31 @@ public class VirtualNetworkApplianceManagerImpl implements VirtualNetworkApplian } return true; } + + protected void stopDisconnectedRouters(List routers, boolean force, String reason) + { + if (routers.isEmpty()) { + return; + } + for (VirtualRouter router : routers) { + if (s_logger.isDebugEnabled()) { + s_logger.debug("About to stop the router " + router.getInstanceName() + " due to: " + reason); + } + String title = "Virtual router " + router.getInstanceName() + " would be stopped, due to " + reason; + String context = "Virtual router (name: " + router.getInstanceName() + ", id: " + router.getId() + ") would be stopped, due to: " + reason; + _alertMgr.sendAlert(AlertManager.ALERT_TYPE_DOMAIN_ROUTER, + router.getDataCenterIdToDeployIn(), router.getPodIdToDeployIn(), title, context); + if (router.getIsRedundantRouter()) { + try { + stopRouter(router.getId(), force); + } catch (ConcurrentOperationException e) { + s_logger.warn("Fail to stop router " + router.getInstanceName(), e); + } catch (ResourceUnavailableException e) { + s_logger.warn("Fail to stop router " + router.getInstanceName(), e); + } + } + } + } @Override public boolean associateIP(Network network, List ipAddress, List routers) throws ResourceUnavailableException { @@ -2143,24 +2192,48 @@ public class VirtualNetworkApplianceManagerImpl implements VirtualNetworkApplian throw new ResourceUnavailableException("Unable to assign ip addresses", DataCenter.class, network.getDataCenterId()); } + List connectedRouters = new ArrayList(); + List disconnectedRouters = new ArrayList(); boolean result = true; + String msg = "Unable to associate ip addresses on disconnected router "; for (VirtualRouter router : routers) { if (router.getState() == State.Running) { Commands cmds = new Commands(OnError.Continue); // Have to resend all already associated ip addresses createAssociateIPCommands(router, ipAddress, cmds, 0); - result = result && sendCommandsToRouter(router, cmds); - } else if (router.getState() != State.Stopped) { + try{ + result = sendCommandsToRouter(router, cmds); + connectedRouters.add(router); + } catch (AgentUnavailableException e) { + s_logger.warn(msg + router.getInstanceName(), e); + disconnectedRouters.add(router); + } + + //If rules fail to apply on one domR, no need to proceed with the rest + if (!result) { + throw new ResourceUnavailableException("Unable to apply firewall rules on router ", VirtualRouter.class, router.getId()); + } + + } else if (router.getState() == State.Stopped || router.getState() == State.Stopping) { + s_logger.debug("Router " + router.getInstanceName() + " is in " + router.getState() + + ", so not sending associate ip address commands to the backend"); + } else { s_logger.warn("Unable to associate ip addresses, virtual router is not in the right state " + router.getState()); throw new ResourceUnavailableException("Unable to assign ip addresses, domR is not in right state " + router.getState(), DataCenter.class, network.getDataCenterId()); } - - //If rules fail to apply on one domR, no need to proceed with the rest - if (!result) { - throw new ResourceUnavailableException("Unable to apply firewall rules on router ", VirtualRouter.class, router.getId()); + } + + if (!connectedRouters.isEmpty()) { + // These disconnected ones are out of sync now, stop them for synchronization + stopDisconnectedRouters(disconnectedRouters, true, msg); + } else if (!disconnectedRouters.isEmpty()) { + for (VirtualRouter router : disconnectedRouters) { + if (s_logger.isDebugEnabled()) { + s_logger.debug(msg + router.getInstanceName() + "(" + router.getId() + ")"); + } } - + throw new ResourceUnavailableException(msg, VirtualRouter.class, disconnectedRouters.get(0).getId()); } return result; } @@ -2172,46 +2245,67 @@ public class VirtualNetworkApplianceManagerImpl implements VirtualNetworkApplian throw new ResourceUnavailableException("Unable to apply firewall rules", DataCenter.class, network.getDataCenterId()); } + List connectedRouters = new ArrayList(); + List disconnectedRouters = new ArrayList(); + String msg = "Unable to apply firewall rules on disconnected router "; boolean result = true; for (VirtualRouter router : routers) { if (router.getState() == State.Running) { if (rules != null && !rules.isEmpty()) { - if (rules.get(0).getPurpose() == Purpose.LoadBalancing) { - // for load balancer we have to resend all lb rules for the network - List lbs = _loadBalancerDao.listByNetworkId(network.getId()); - List lbRules = new ArrayList(); - for (LoadBalancerVO lb : lbs) { - List dstList = _lbMgr.getExistingDestinations(lb.getId()); - LoadBalancingRule loadBalancing = new LoadBalancingRule(lb, dstList); - lbRules.add(loadBalancing); + try { + if (rules.get(0).getPurpose() == Purpose.LoadBalancing) { + // for load balancer we have to resend all lb rules for the network + List lbs = _loadBalancerDao.listByNetworkId(network.getId()); + List lbRules = new ArrayList(); + for (LoadBalancerVO lb : lbs) { + List dstList = _lbMgr.getExistingDestinations(lb.getId()); + LoadBalancingRule loadBalancing = new LoadBalancingRule(lb, dstList); + lbRules.add(loadBalancing); + } + result = result && applyLBRules(router, lbRules); + } else if (rules.get(0).getPurpose() == Purpose.PortForwarding) { + result = result && applyPortForwardingRules(router, (List) rules); + } else if (rules.get(0).getPurpose() == Purpose.StaticNat) { + result = result && applyStaticNatRules(router, (List) rules); + } else if (rules.get(0).getPurpose() == Purpose.Firewall) { + result = result && applyFirewallRules(router, (List) rules); + } else { + s_logger.warn("Unable to apply rules of purpose: " + rules.get(0).getPurpose()); + result = false; } - result = result && applyLBRules(router, lbRules); - } else if (rules.get(0).getPurpose() == Purpose.PortForwarding) { - result = result && applyPortForwardingRules(router, (List) rules); - } else if (rules.get(0).getPurpose() == Purpose.StaticNat) { - result = result && applyStaticNatRules(router, (List) rules); - } else if (rules.get(0).getPurpose() == Purpose.Firewall) { - result = result && applyFirewallRules(router, (List) rules); - }else { - s_logger.warn("Unable to apply rules of purpose: " + rules.get(0).getPurpose()); - result = false; + connectedRouters.add(router); + } catch (AgentUnavailableException e) { + s_logger.warn(msg + router.getInstanceName(), e); + disconnectedRouters.add(router); } } - - //If rules fail to apply on one domR, no need to proceed with the rest + + //If rules fail to apply on one domR and not due to disconnection, no need to proceed with the rest if (!result) { throw new ResourceUnavailableException("Unable to apply firewall rules on router ", VirtualRouter.class, router.getId()); } - } else if (router.getState() == State.Stopped || router.getState() == State.Stopping) { - s_logger.debug("Router is in " + router.getState() + ", so not sending apply firewall rules commands to the backend"); + s_logger.debug("Router " + router.getInstanceName() + " is in " + router.getState() + + ", so not sending apply firewall rules commands to the backend"); } else { s_logger.warn("Unable to apply firewall rules, virtual router is not in the right state " + router.getState()); throw new ResourceUnavailableException("Unable to apply firewall rules, virtual router is not in the right state", VirtualRouter.class, router.getId()); } } - return result; + if (!connectedRouters.isEmpty()) { + // These disconnected ones are out of sync now, stop them for synchronization + stopDisconnectedRouters(disconnectedRouters, true, msg); + } else if (!disconnectedRouters.isEmpty()) { + for (VirtualRouter router : disconnectedRouters) { + if (s_logger.isDebugEnabled()) { + s_logger.debug(msg + router.getInstanceName() + "(" + router.getId() + ")"); + } + } + throw new ResourceUnavailableException(msg, VirtualRouter.class, disconnectedRouters.get(0).getId()); + } + + return true; } protected boolean applyLBRules(VirtualRouter router, List rules) throws ResourceUnavailableException { @@ -2286,26 +2380,47 @@ public class VirtualNetworkApplianceManagerImpl implements VirtualNetworkApplian throw new ResourceUnavailableException("Unable to create static nat", DataCenter.class, network.getDataCenterId()); } + List connectedRouters = new ArrayList(); + List disconnectedRouters = new ArrayList(); boolean result = true; + String msg = "Unable to apply static nat on disconnected router "; for (VirtualRouter router : routers) { if (router.getState() == State.Running) { s_logger.debug("Applying " + rules.size() + " static nat in network " + network); - result = applyStaticNat(router, rules); - - //If rules fail to apply on one domR, no need to proceed with the rest + try { + result = applyStaticNat(router, rules); + connectedRouters.add(router); + } catch (AgentUnavailableException e) { + s_logger.warn(msg + router.getInstanceName(), e); + disconnectedRouters.add(router); + } + + //If rules fail to apply on one domR and not due to disconnection, no need to proceed with the rest if (!result) { throw new ResourceUnavailableException("Unable to apply static nat on router ", VirtualRouter.class, router.getId()); } - + } else if (router.getState() == State.Stopped || router.getState() == State.Stopping) { - s_logger.debug("Router is in " + router.getState() + ", so not sending apply firewall rules commands to the backend"); + s_logger.debug("Router " + router.getInstanceName() + " is in " + router.getState() + ", so not sending apply static nat commands to the backend"); } else { s_logger.warn("Unable to apply static nat, virtual router is not in the right state " + router.getState()); throw new ResourceUnavailableException("Unable to apply static nat, virtual router is not in the right state", VirtualRouter.class, router.getId()); } } - return result; + if (!connectedRouters.isEmpty()) { + // These disconnected ones are out of sync now, stop them for synchronization + stopDisconnectedRouters(disconnectedRouters, true, msg); + } else if (!disconnectedRouters.isEmpty()) { + for (VirtualRouter router : disconnectedRouters) { + if (s_logger.isDebugEnabled()) { + s_logger.debug(msg + router.getInstanceName() + "(" + router.getId() + ")"); + } + } + throw new ResourceUnavailableException(msg, VirtualRouter.class, disconnectedRouters.get(0).getId()); + } + + return true; }