From 4bc86865133874f6c9eb730e9c668cf33b909f2d Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Mon, 27 Jun 2011 16:24:55 -0700 Subject: [PATCH] bug 10429: Backport redundant virtual router Part 1 This backport contained: commit 52317c718c25111c2535657139b541db0c9d1e1f bug 9154: Initial check in for enabling redundant virtual router commit 54199112055d754371bfb141168fb5538bf6d6ea Add host verification for CheckRouterCommand commit cef978a228c90056ead9be10cbc4de74c2b8de76 Fix CheckRouterAnswer's isMaster report commit 4072f0a6991ac3b63601a1764fbe14188965f62f Some build fixes and code refactoring for redundant router commit 4d3350b7cd8ee2706a9bace4437fc194e36c8dd5 Redundant Router: Fix OVS commit 6a228830e7c46d819fa0c3317e159e041337e887 Fix findByNetwork()/findByNetworkAndPod()'s return commit c627777b3d5bdbcd60db4032cebd349a5b1ecd83 Redundant Router: Fix isVmAlive() commit e1275d2514adc41f8744f5107d4069c38be195f1 Only issue CheckRouterCommand to redundant routers And all modification to the scripts till commit 4e3942462ed3fde3a3d7011e95839e2128fba514 logging changes in the master branch. --- .../cloud/agent/api/CheckRouterAnswer.java | 44 ++ .../cloud/agent/api/CheckRouterCommand.java | 31 ++ api/src/com/cloud/api/ApiConstants.java | 2 +- .../VirtualRoutingResource.java | 32 ++ core/src/com/cloud/vm/DomainRouterVO.java | 40 ++ patches/systemvm/debian/buildsystemvm.sh | 2 + .../config/etc/init.d/cloud-early-config | 72 ++- .../debian/config/etc/init.d/postinit | 21 +- .../config/etc/iptables/iptables-router | 2 + .../config/opt/cloud/bin/patchsystemvm.sh | 12 +- .../debian/config/root/checkrouter.sh | 3 + .../systemvm/debian/config/root/ipassoc.sh | 40 +- .../config/root/redundant_router/backup.sh | 32 ++ .../redundant_router/conntrackd.conf.templ | 401 +++++++++++++++ .../root/redundant_router/disable_pubip.sh | 4 + .../redundant_router/enable_pubip.sh.templ | 7 + .../config/root/redundant_router/fault.sh | 6 + .../redundant_router/keepalived.conf.templ | 56 +++ .../config/root/redundant_router/master.sh | 32 ++ .../root/redundant_router/primary-backup.sh | 128 +++++ scripts/network/domr/getRouterStatus.sh | 39 ++ .../src/com/cloud/configuration/Config.java | 1 + .../com/cloud/ha/UserVmDomRInvestigator.java | 13 +- .../com/cloud/network/NetworkManagerImpl.java | 2 +- .../cloud/network/element/DhcpElement.java | 51 +- .../network/element/VirtualRouterElement.java | 41 +- .../cloud/network/guru/GuestNetworkGuru.java | 25 - .../network/ovs/OvsNetworkManagerImpl.java | 42 +- .../network/ovs/OvsTunnelManagerImpl.java | 14 +- .../VirtualNetworkApplianceManager.java | 12 +- .../VirtualNetworkApplianceManagerImpl.java | 473 +++++++++++------- .../cloud/vm/VirtualMachineManagerImpl.java | 9 + .../src/com/cloud/vm/dao/DomainRouterDao.java | 8 +- .../com/cloud/vm/dao/DomainRouterDaoImpl.java | 16 +- setup/db/create-schema.sql | 3 + 35 files changed, 1405 insertions(+), 311 deletions(-) create mode 100644 api/src/com/cloud/agent/api/CheckRouterAnswer.java create mode 100644 api/src/com/cloud/agent/api/CheckRouterCommand.java create mode 100755 patches/systemvm/debian/config/root/checkrouter.sh create mode 100644 patches/systemvm/debian/config/root/redundant_router/backup.sh create mode 100644 patches/systemvm/debian/config/root/redundant_router/conntrackd.conf.templ create mode 100644 patches/systemvm/debian/config/root/redundant_router/disable_pubip.sh create mode 100644 patches/systemvm/debian/config/root/redundant_router/enable_pubip.sh.templ create mode 100644 patches/systemvm/debian/config/root/redundant_router/fault.sh create mode 100644 patches/systemvm/debian/config/root/redundant_router/keepalived.conf.templ create mode 100644 patches/systemvm/debian/config/root/redundant_router/master.sh create mode 100644 patches/systemvm/debian/config/root/redundant_router/primary-backup.sh create mode 100644 scripts/network/domr/getRouterStatus.sh diff --git a/api/src/com/cloud/agent/api/CheckRouterAnswer.java b/api/src/com/cloud/agent/api/CheckRouterAnswer.java new file mode 100644 index 00000000000..814b5cb124e --- /dev/null +++ b/api/src/com/cloud/agent/api/CheckRouterAnswer.java @@ -0,0 +1,44 @@ +/** + * Copyright (C) 2010 Cloud.com, Inc. All rights reserved. + * + * This software is licensed under the GNU General Public License v3 or later. + * + * It is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or any later version. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + */ +package com.cloud.agent.api; + +public class CheckRouterAnswer extends Answer { + public static final String ROUTER_NAME = "router.name"; + public static final String ROUTER_IP = "router.ip"; + boolean isMaster; + + protected CheckRouterAnswer() { + } + + public CheckRouterAnswer(CheckRouterCommand cmd, boolean isMaster, String details) { + super(cmd, true, details); + this.isMaster = isMaster; + } + + public CheckRouterAnswer(CheckRouterCommand cmd, String details) { + super(cmd, false, details); + } + + public boolean getIsMaster() { + return isMaster; + } + + public void setIsMaster(boolean isMaster) { + this.isMaster = isMaster; + } +} diff --git a/api/src/com/cloud/agent/api/CheckRouterCommand.java b/api/src/com/cloud/agent/api/CheckRouterCommand.java new file mode 100644 index 00000000000..87325920b8c --- /dev/null +++ b/api/src/com/cloud/agent/api/CheckRouterCommand.java @@ -0,0 +1,31 @@ +/** + * Copyright (C) 2010 Cloud.com, Inc. All rights reserved. + * + * This software is licensed under the GNU General Public License v3 or later. + * + * It is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or any later version. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + */ +package com.cloud.agent.api; + +import com.cloud.agent.api.routing.NetworkElementCommand; + +public class CheckRouterCommand extends NetworkElementCommand { + @Override + public boolean executeInSequence() { + return false; + } + + public CheckRouterCommand() { + super(); + } +} diff --git a/api/src/com/cloud/api/ApiConstants.java b/api/src/com/cloud/api/ApiConstants.java index 27347bfcd02..3a43d0a9afe 100755 --- a/api/src/com/cloud/api/ApiConstants.java +++ b/api/src/com/cloud/api/ApiConstants.java @@ -245,6 +245,6 @@ public class ApiConstants { public static final String VOLUME_NAME = "volumename"; public static final String SNAPSHOT_POLICY = "snapshotpolicy"; public static final String SNAPSHOT_RESERVATION = "snapshotreservation"; - public static final String REDUNDANT_ROUTER = "redundantrouter"; public static final String IP_NETWORK_LIST = "iptonetworklist"; + public static final String REDUNDANT_ROUTER = "redundantrouter"; } diff --git a/core/src/com/cloud/agent/resource/virtualnetwork/VirtualRoutingResource.java b/core/src/com/cloud/agent/resource/virtualnetwork/VirtualRoutingResource.java index c12a35b4948..810fe79fbe2 100755 --- a/core/src/com/cloud/agent/resource/virtualnetwork/VirtualRoutingResource.java +++ b/core/src/com/cloud/agent/resource/virtualnetwork/VirtualRoutingResource.java @@ -40,6 +40,8 @@ import org.apache.commons.codec.binary.Base64; import org.apache.log4j.Logger; import com.cloud.agent.api.Answer; +import com.cloud.agent.api.CheckRouterAnswer; +import com.cloud.agent.api.CheckRouterCommand; import com.cloud.agent.api.Command; import com.cloud.agent.api.proxy.CheckConsoleProxyLoadCommand; import com.cloud.agent.api.proxy.ConsoleProxyLoadAnswer; @@ -86,6 +88,7 @@ public class VirtualRoutingResource implements Manager { private String _vmDataPath; private String _publicEthIf; private String _privateEthIf; + private String _getRouterStatusPath; private int _timeout; @@ -116,6 +119,8 @@ public class VirtualRoutingResource implements Manager { return execute((DhcpEntryCommand)cmd); } else if (cmd instanceof VmDataCommand) { return execute ((VmDataCommand)cmd); + } else if (cmd instanceof CheckRouterCommand) { + return execute ((CheckRouterCommand)cmd); } else { return Answer.createUnsupportedCommandAnswer(cmd); } @@ -346,6 +351,28 @@ public class VirtualRoutingResource implements Manager { return new Answer(cmd, result==null, result); } + public String getRouterStatus(String routerIP) { + final Script command = new Script(_getRouterStatusPath, _timeout, s_logger); + final OutputInterpreter.OneLineParser parser = new OutputInterpreter.OneLineParser(); + command.add(routerIP); + String result = command.execute(parser); + if (result == null) { + return parser.getLine(); + } + return null; + } + + protected Answer execute(CheckRouterCommand cmd) { + final String routerPrivateIPAddress = cmd.getAccessDetail(NetworkElementCommand.ROUTER_IP); + + final String result = getRouterStatus(routerPrivateIPAddress); + CheckRouterAnswer answer = new CheckRouterAnswer(cmd, false, "Router return: " + result); + if (result != null) { + answer.setIsMaster(result.equals("Status: MASTER")); + } + return answer; + } + protected Answer execute(final CheckConsoleProxyLoadCommand cmd) { return executeProxyLoadScan(cmd, cmd.getProxyVmId(), cmd.getProxyVmName(), cmd.getProxyManagementIp(), cmd.getProxyCmdPort()); } @@ -651,6 +678,11 @@ public class VirtualRoutingResource implements Manager { throw new ConfigurationException("Unable to find user_data.sh"); } + _getRouterStatusPath = findScript("getRouterStatus.sh"); + if(_getRouterStatusPath == null) { + throw new ConfigurationException("Unable to find getRouterStatus.sh"); + } + _publicEthIf = (String)params.get("public.network.device"); if (_publicEthIf == null) { _publicEthIf = "xenbr1"; diff --git a/core/src/com/cloud/vm/DomainRouterVO.java b/core/src/com/cloud/vm/DomainRouterVO.java index 02ab3a5c18f..5a42b6354f0 100755 --- a/core/src/com/cloud/vm/DomainRouterVO.java +++ b/core/src/com/cloud/vm/DomainRouterVO.java @@ -50,6 +50,15 @@ public class DomainRouterVO extends VMInstanceVO implements VirtualRouter { @Column(name="network_id") long networkId; + + @Column(name="is_redundant_router") + boolean isRedundantRouter; + + @Column(name="priority") + int priority; + + @Column(name="is_master") + boolean isMaster; @Column(name="role") @Enumerated(EnumType.STRING) @@ -64,9 +73,15 @@ public class DomainRouterVO extends VMInstanceVO implements VirtualRouter { long domainId, long accountId, long networkId, + boolean isRedundantRouter, + int priority, + boolean isMaster, boolean haEnabled) { super(id, serviceOfferingId, name, name, Type.DomainRouter, templateId, hypervisorType, guestOSId, domainId, accountId, haEnabled); this.networkId = networkId; + this.isRedundantRouter = isRedundantRouter; + this.priority = priority; + this.isMaster = isMaster; } public void setPublicIpAddress(String publicIpAddress) { @@ -123,10 +138,35 @@ public class DomainRouterVO extends VMInstanceVO implements VirtualRouter { this.role = role; } + public boolean getIsRedundantRouter() { + return this.isRedundantRouter; + } + + public void setIsRedundantRouter(boolean isRedundantRouter) { + this.isRedundantRouter = isRedundantRouter; + } + @Override public long getServiceOfferingId() { return serviceOfferingId; } + + public int getPriority() { + return this.priority; + } + + public void setPriority(int priority) { + this.priority = priority; + } + + public boolean getIsMaster() { + return this.isMaster; + } + + public void setIsMaster(boolean isMaster) { + this.isMaster = isMaster; + } + public void setServiceOfferingId(long serviceOfferingId) { this.serviceOfferingId = serviceOfferingId; } diff --git a/patches/systemvm/debian/buildsystemvm.sh b/patches/systemvm/debian/buildsystemvm.sh index 0c8cd5ab51f..d45cc4b9050 100755 --- a/patches/systemvm/debian/buildsystemvm.sh +++ b/patches/systemvm/debian/buildsystemvm.sh @@ -372,6 +372,8 @@ packages() { chroot . apt-get --no-install-recommends -q -y --force-yes install open-vm-tools #xenstore utils chroot . apt-get --no-install-recommends -q -y --force-yes install xenstore-utils libxenstore3.0 + #keepalived and conntrackd + chroot . apt-get --no-install-recommends -q -y --force-yes install keepalived conntrackd ipvsadm libnetfilter-conntrack3 libnl1 echo "***** getting sun jre 6*********" chroot . echo 'sun-java6-bin shared/accepted-sun-dlj-v1-1 boolean true diff --git a/patches/systemvm/debian/config/etc/init.d/cloud-early-config b/patches/systemvm/debian/config/etc/init.d/cloud-early-config index 80704f848cc..fb607d34d72 100755 --- a/patches/systemvm/debian/config/etc/init.d/cloud-early-config +++ b/patches/systemvm/debian/config/etc/init.d/cloud-early-config @@ -194,7 +194,7 @@ setup_common() { init_interfaces $1 $2 $3 setup_interface "0" $ETH0_IP $ETH0_MASK $GW setup_interface "1" $ETH1_IP $ETH1_MASK $GW - if [ -n "$ETH2_IP" ] + if [ -n "$ETH2_IP" -a "$RROUTER" != "1" ] then setup_interface "2" $ETH2_IP $ETH2_MASK $GW fi @@ -231,12 +231,15 @@ setup_common() { ip route add $MGMTNET via $LOCAL_GW dev eth1 fi - ip route delete default - if [ -n "$3" ] - then - ip route add default via $GW dev $3 - else - ip route add default via $GW dev eth0 + ip route delete default + if [ "$RROUTER" != "1" ] + then + if [ -z "$3" ] + then + ip route add default via $GW dev eth0 + else + ip route add default via $GW dev $3 + fi fi } @@ -256,6 +259,13 @@ setup_dnsmasq() { sed -i -e "s/^dhcp-range=.*$/dhcp-range=$DHCP_RANGE,static/" /etc/dnsmasq.conf sed -i -e "s/^[#]*listen-address=.*$/listen-address=$ETH0_IP/" /etc/dnsmasq.conf + if [ "$RROUTER" == "1" ] + then + sed -i -e "/^[#]*dhcp-option=option:router.*$/d" /etc/dnsmasq.conf + echo "dhcp-option=option:router,$GUEST_GW" >> /etc/dnsmasq.conf + sed -i -e "/^[#]*dhcp-option=6.*$/d" /etc/dnsmasq.conf + echo "dhcp-option=6,$GUEST_GW" >> /etc/dnsmasq.conf + fi } setup_sshd(){ @@ -273,11 +283,42 @@ setup_apache2() { [ -f /etc/apache2/ports.conf ] && sed -i -e "s/NameVirtualHost .*:80/NameVirtualHost $ip:80/g" /etc/apache2/ports.conf } +setup_redundant_router() { + rm /tmp/rrouter.lock + ip route delete default + cp /root/redundant_router/keepalived.conf.templ /etc/keepalived/keepalived.conf + cp /root/redundant_router/conntrackd.conf.templ /etc/conntrackd/conntrackd.conf + cp /root/redundant_router/enable_pubip.sh.templ /root/redundant_router/enable_pubip.sh + sed -i "s/\[ROUTER_ID\]/$NAME/g" /etc/keepalived/keepalived.conf + sed -i "s/\[ROUTER_IP\]/$GUEST_GW\/$GUEST_CIDR_SIZE/g" /etc/keepalived/keepalived.conf + sed -i "s/\[BOARDCAST\]/$GUEST_BRD/g" /etc/keepalived/keepalived.conf + sed -i "s/\[PRIORITY\]/$ROUTER_PR/g" /etc/keepalived/keepalived.conf + sed -i "s/\[LINK_IF\]/eth0/g" /etc/conntrackd/conntrackd.conf + sed -i "s/\[LINK_IP\]/$ETH0_IP/g" /etc/conntrackd/conntrackd.conf + sed -i "s/\[IGNORE_IP1\]/$GUEST_GW/g" /etc/conntrackd/conntrackd.conf + sed -i "s/\[IGNORE_IP2\]/$ETH0_IP/g" /etc/conntrackd/conntrackd.conf + sed -i "s/\[IGNORE_IP3\]/$ETH1_IP/g" /etc/conntrackd/conntrackd.conf + sed -i "s/\[ETH2IP\]/$ETH2_IP/g" /root/redundant_router/enable_pubip.sh + sed -i "s/\[GATEWAY\]/$GW/g" /root/redundant_router/enable_pubip.sh + sed -i "s/--exec\ \$DAEMON;/--exec\ \$DAEMON\ --\ --vrrp;/g" /etc/init.d/keepalived + grep "sleep 10;" /etc/init.d/keepalived > /dev/null + if [ $? -ne 0 ] + then + sed -i "s/if\ start-stop-daemon\ --start/sleep\ 10;if\ start-stop-daemon\ --start/g" /etc/init.d/keepalived + fi +} + setup_router() { log_it "Setting up virtual router system vm" if [ -n "$ETH2_IP" ] then - setup_common eth0 eth1 eth2 + if [ "$RROUTER" == "1" ] + then + setup_common eth0 eth1 + setup_redundant_router + else + setup_common eth0 eth1 eth2 + fi else setup_common eth0 eth1 fi @@ -517,6 +558,21 @@ for i in $CMDLINE defaultroute) DEFAULTROUTE=$VALUE ;; + redundant_router) + RROUTER=$VALUE + ;; + guestgw) + GUEST_GW=$VALUE + ;; + guestbrd) + GUEST_BRD=$VALUE + ;; + guestcidrsize) + GUEST_CIDR_SIZE=$VALUE + ;; + router_pr) + ROUTER_PR=$VALUE + ;; esac done } diff --git a/patches/systemvm/debian/config/etc/init.d/postinit b/patches/systemvm/debian/config/etc/init.d/postinit index f9502408978..b68c708229e 100755 --- a/patches/systemvm/debian/config/etc/init.d/postinit +++ b/patches/systemvm/debian/config/etc/init.d/postinit @@ -33,6 +33,15 @@ setup_console_proxy() { echo "$public_ip $NAME" >> /etc/hosts } +setup_redundant_router() { + if [ "$RROUTER" != "1" ] + then + return 1 + fi + eth2mac=`ip link show eth2 | awk '/ether/ {print $2}'` + sed -i "s/\[ETH2MAC\]/$eth2mac/g" /root/redundant_router/enable_pubip.sh +} + start() { case $TYPE in secstorage) @@ -43,6 +52,11 @@ start() { [ "$NAME" == "" ] && NAME=consoleproxy setup_console_proxy; ;; + router) + [ "$NAME" == "" ] && NAME=router + setup_redundant_router; + ;; + esac } @@ -54,7 +68,7 @@ status() { echo "" } -CMDLINE=$(cat /proc/cmdline) +CMDLINE=$(cat /var/cache/cloud/cmdline) TYPE="router" BOOTPROTO="static" @@ -115,10 +129,13 @@ for i in $CMDLINE type) TYPE=$VALUE ;; + redundant_router) + RROUTER=$VALUE + ;; esac done -if [ "$BOOTPROTO" == "static" ] +if [ "$BOOTPROTO" == "static" -a "$RROUTER" != "1" ] then exit 0 fi diff --git a/patches/systemvm/debian/config/etc/iptables/iptables-router b/patches/systemvm/debian/config/etc/iptables/iptables-router index 3bc7b50f74a..9b56209159e 100644 --- a/patches/systemvm/debian/config/etc/iptables/iptables-router +++ b/patches/systemvm/debian/config/etc/iptables/iptables-router @@ -7,6 +7,8 @@ COMMIT :INPUT DROP [0:0] :FORWARD DROP [0:0] :OUTPUT ACCEPT [0:0] +-A INPUT -d 224.0.0.18/32 -j ACCEPT +-A INPUT -d 225.0.0.50/32 -j ACCEPT -A INPUT -i eth0 -m state --state RELATED,ESTABLISHED -j ACCEPT -A INPUT -i eth1 -m state --state RELATED,ESTABLISHED -j ACCEPT -A INPUT -i eth2 -m state --state RELATED,ESTABLISHED -j ACCEPT diff --git a/patches/systemvm/debian/config/opt/cloud/bin/patchsystemvm.sh b/patches/systemvm/debian/config/opt/cloud/bin/patchsystemvm.sh index e9bcc5ebe2f..b5710db7622 100755 --- a/patches/systemvm/debian/config/opt/cloud/bin/patchsystemvm.sh +++ b/patches/systemvm/debian/config/opt/cloud/bin/patchsystemvm.sh @@ -67,6 +67,8 @@ secstorage_svcs() { } routing_svcs() { + grep "redundant_router" /var/cache/cloud/cmdline > /dev/null + RROUTER=$? chkconfig cloud off chkconfig cloud-passwd-srvr on ; chkconfig haproxy on ; @@ -74,7 +76,15 @@ routing_svcs() { chkconfig ssh on chkconfig nfs-common off chkconfig portmap off - echo "cloud-passwd-srvr ssh dnsmasq haproxy apache2" > /var/cache/cloud/enabled_svcs + if [ $RROUTER -eq 0 ] + then + chkconfig postinit on + echo "postinit" > /var/cache/cloud/enabled_svcs + else + chkconfig keepalived off + chkconfig conntrackd off + fi + echo "cloud-passwd-srvr ssh dnsmasq haproxy apache2" >> /var/cache/cloud/enabled_svcs echo "cloud nfs-common portmap" > /var/cache/cloud/disabled_svcs } diff --git a/patches/systemvm/debian/config/root/checkrouter.sh b/patches/systemvm/debian/config/root/checkrouter.sh new file mode 100755 index 00000000000..9b297663ecd --- /dev/null +++ b/patches/systemvm/debian/config/root/checkrouter.sh @@ -0,0 +1,3 @@ +#!/bin/bash + +tail -n 1 /root/keepalived.log | grep "Status" diff --git a/patches/systemvm/debian/config/root/ipassoc.sh b/patches/systemvm/debian/config/root/ipassoc.sh index f1ec840314e..949b8ac4671 100644 --- a/patches/systemvm/debian/config/root/ipassoc.sh +++ b/patches/systemvm/debian/config/root/ipassoc.sh @@ -30,12 +30,12 @@ usage() { printf " %s -D -l -c [-f] \n" $(basename $0) >&2 } - add_nat_entry() { local pubIp=$1 logger -t cloud "$(basename $0):Adding nat entry for ip $pubIp on interface $ethDev" local ipNoMask=$(echo $1 | awk -F'/' '{print $1}') - sudo ip link set $ethDev up + sudo ip link show $ethDev | grep "state DOWN" > /dev/null + local old_state=$? sudo ip addr add dev $ethDev $pubIp sudo iptables -D FORWARD -i $ethDev -o eth0 -m state --state RELATED,ESTABLISHED -j ACCEPT sudo iptables -D FORWARD -i eth0 -o $ethDev -j ACCEPT @@ -43,16 +43,19 @@ add_nat_entry() { sudo iptables -A FORWARD -i $ethDev -o eth0 -m state --state RELATED,ESTABLISHED -j ACCEPT sudo iptables -A FORWARD -i eth0 -o $ethDev -j ACCEPT sudo iptables -t nat -I POSTROUTING -j SNAT -o $ethDev --to-source $ipNoMask ; - sudo arping -c 3 -I $ethDev -A -U -s $ipNoMask $ipNoMask; if [ $? -gt 0 -a $? -ne 2 ] then logger -t cloud "$(basename $0):Failed adding nat entry for ip $pubIp on interface $ethDev" return 1 fi logger -t cloud "$(basename $0):Added nat entry for ip $pubIp on interface $ethDev" + if [ $if_keep_state -ne 1 -o $old_state -ne 0 ] + then + sudo ip link set $ethDev up + sudo arping -c 3 -I $ethDev -A -U -s $ipNoMask $ipNoMask; + fi return 0 - } del_nat_entry() { @@ -79,10 +82,17 @@ add_an_ip () { local pubIp=$1 logger -t cloud "$(basename $0):Adding ip $pubIp on interface $ethDev" local ipNoMask=$(echo $1 | awk -F'/' '{print $1}') + sudo ip link show $ethDev | grep "state DOWN" > /dev/null + local old_state=$? - sudo ip link set $ethDev up sudo ip addr add dev $ethDev $pubIp ; - sudo arping -c 3 -I $ethDev -A -U -s $ipNoMask $ipNoMask; + + if [ $if_keep_state -ne 1 -o $old_state -ne 0 ] + then + sudo ip link set $ethDev up + sudo arping -c 3 -I $ethDev -A -U -s $ipNoMask $ipNoMask; + fi + return $? } @@ -127,6 +137,24 @@ fflag= cflag= op="" +is_master=0 +is_redundant=0 +if_keep_state=0 +sudo ls /root/keepalived.log > /dev/null 2>&1 +if [ $? -eq 0 ] +then + is_redundant=1 + sudo /root/checkrouter.sh|grep "Status: MASTER" > /dev/null 2>&1 + if [ $? -eq 0 ] + then + is_master=1 + fi +fi +if [ $is_redundant -eq 1 -a $is_master -ne 1 ] +then + if_keep_state=1 +fi + while getopts 'fADa:l:c:' OPTION do case $OPTION in diff --git a/patches/systemvm/debian/config/root/redundant_router/backup.sh b/patches/systemvm/debian/config/root/redundant_router/backup.sh new file mode 100644 index 00000000000..78875e08a8b --- /dev/null +++ b/patches/systemvm/debian/config/root/redundant_router/backup.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +LOCK=/tmp/rrouter.lock +locked=0 + +# Wait the lock +for i in `seq 1 5` +do + if [ ! -e $LOCK ] + then + touch $LOCK + locked=1 + break + fi + sleep 1 + echo sleep 1 +done + +if [ $locked -eq 0 ] +then + echo Status: fail to get the lock! >> /root/keepalived.log + exit +fi + +echo To backup called >> /root/keepalived.log +/root/redundant_router/disable_pubip.sh >> /root/keepalived.log 2>&1 +echo Disable public ip $? >> /root/keepalived.log +/root/redundant_router/primary-backup.sh backup >> /root/keepalived.log 2>&1 +echo Switch conntrackd mode backup $? >> /root/keepalived.log +echo Status: BACKUP >> /root/keepalived.log + +rm $LOCK diff --git a/patches/systemvm/debian/config/root/redundant_router/conntrackd.conf.templ b/patches/systemvm/debian/config/root/redundant_router/conntrackd.conf.templ new file mode 100644 index 00000000000..091de105625 --- /dev/null +++ b/patches/systemvm/debian/config/root/redundant_router/conntrackd.conf.templ @@ -0,0 +1,401 @@ +# +# Synchronizer settings +# +Sync { + Mode FTFW { + # + # Size of the resend queue (in objects). This is the maximum + # number of objects that can be stored waiting to be confirmed + # via acknoledgment. If you keep this value low, the daemon + # will have less chances to recover state-changes under message + # omission. On the other hand, if you keep this value high, + # the daemon will consume more memory to store dead objects. + # Default is 131072 objects. + # + # ResendQueueSize 131072 + + # + # This parameter allows you to set an initial fixed timeout + # for the committed entries when this node goes from backup + # to primary. This mechanism provides a way to purge entries + # that were not recovered appropriately after the specified + # fixed timeout. If you set a low value, TCP entries in + # Established states with no traffic may hang. For example, + # an SSH connection without KeepAlive enabled. If not set, + # the daemon uses an approximate timeout value calculation + # mechanism. By default, this option is not set. + # + # CommitTimeout 180 + + # + # If the firewall replica goes from primary to backup, + # the conntrackd -t command is invoked in the script. + # This command schedules a flush of the table in N seconds. + # This is useful to purge the connection tracking table of + # zombie entries and avoid clashes with old entries if you + # trigger several consecutive hand-overs. Default is 60 seconds. + # + # PurgeTimeout 60 + + # Set the acknowledgement window size. If you decrease this + # value, the number of acknowlegdments increases. More + # acknowledgments means more overhead as conntrackd has to + # handle more control messages. On the other hand, if you + # increase this value, the resend queue gets more populated. + # This results in more overhead in the queue releasing. + # The following value is based on some practical experiments + # measuring the cycles spent by the acknowledgment handling + # with oprofile. If not set, default window size is 300. + # + # ACKWindowSize 300 + + # + # This clause allows you to disable the external cache. Thus, + # the state entries are directly injected into the kernel + # conntrack table. As a result, you save memory in user-space + # but you consume slots in the kernel conntrack table for + # backup state entries. Moreover, disabling the external cache + # means more CPU consumption. You need a Linux kernel + # >= 2.6.29 to use this feature. By default, this clause is + # set off. If you are installing conntrackd for first time, + # please read the user manual and I encourage you to consider + # using the fail-over scripts instead of enabling this option! + # + # DisableExternalCache Off + } + + # + # Multicast IP and interface where messages are + # broadcasted (dedicated link). IMPORTANT: Make sure + # that iptables accepts traffic for destination + # 225.0.0.50, eg: + # + # iptables -I INPUT -d 225.0.0.50 -j ACCEPT + # iptables -I OUTPUT -d 225.0.0.50 -j ACCEPT + # + Multicast { + # + # Multicast address: The address that you use as destination + # in the synchronization messages. You do not have to add + # this IP to any of your existing interfaces. If any doubt, + # do not modify this value. + # + IPv4_address 225.0.0.50 + + # + # The multicast group that identifies the cluster. If any + # doubt, do not modify this value. + # + Group 3780 + + # + # IP address of the interface that you are going to use to + # send the synchronization messages. Remember that you must + # use a dedicated link for the synchronization messages. + # + IPv4_interface [LINK_IP] + + # + # The name of the interface that you are going to use to + # send the synchronization messages. + # + Interface [LINK_IF] + + # The multicast sender uses a buffer to enqueue the packets + # that are going to be transmitted. The default size of this + # socket buffer is available at /proc/sys/net/core/wmem_default. + # This value determines the chances to have an overrun in the + # sender queue. The overrun results packet loss, thus, losing + # state information that would have to be retransmitted. If you + # notice some packet loss, you may want to increase the size + # of the sender buffer. The default size is usually around + # ~100 KBytes which is fairly small for busy firewalls. + # + SndSocketBuffer 1249280 + + # The multicast receiver uses a buffer to enqueue the packets + # that the socket is pending to handle. The default size of this + # socket buffer is available at /proc/sys/net/core/rmem_default. + # This value determines the chances to have an overrun in the + # receiver queue. The overrun results packet loss, thus, losing + # state information that would have to be retransmitted. If you + # notice some packet loss, you may want to increase the size of + # the receiver buffer. The default size is usually around + # ~100 KBytes which is fairly small for busy firewalls. + # + RcvSocketBuffer 1249280 + + # + # Enable/Disable message checksumming. This is a good + # property to achieve fault-tolerance. In case of doubt, do + # not modify this value. + # + Checksum on + } + # + # You can specify more than one dedicated link. Thus, if one dedicated + # link fails, conntrackd can fail-over to another. Note that adding + # more than one dedicated link does not mean that state-updates will + # be sent to all of them. There is only one active dedicated link at + # a given moment. The `Default' keyword indicates that this interface + # will be selected as the initial dedicated link. You can have + # up to 4 redundant dedicated links. Note: Use different multicast + # groups for every redundant link. + # + # Multicast Default { + # IPv4_address 225.0.0.51 + # Group 3781 + # IPv4_interface 192.168.100.101 + # Interface eth3 + # # SndSocketBuffer 1249280 + # # RcvSocketBuffer 1249280 + # Checksum on + # } + + # + # You can use Unicast UDP instead of Multicast to propagate events. + # Note that you cannot use unicast UDP and Multicast at the same + # time, you can only select one. + # + # UDP { + # + # UDP address that this firewall uses to listen to events. + # + # IPv4_address 192.168.2.100 + # + # or you may want to use an IPv6 address: + # + # IPv6_address fe80::215:58ff:fe28:5a27 + + # + # Destination UDP address that receives events, ie. the other + # firewall's dedicated link address. + # + # IPv4_Destination_Address 192.168.2.101 + # + # or you may want to use an IPv6 address: + # + # IPv6_Destination_Address fe80::2d0:59ff:fe2a:775c + + # + # UDP port used + # + # Port 3780 + + # + # The name of the interface that you are going to use to + # send the synchronization messages. + # + # Interface eth2 + + # + # The sender socket buffer size + # + # SndSocketBuffer 1249280 + + # + # The receiver socket buffer size + # + # RcvSocketBuffer 1249280 + + # + # Enable/Disable message checksumming. + # + # Checksum on + # } + +} + +# +# General settings +# +General { + # + # Set the nice value of the daemon, this value goes from -20 + # (most favorable scheduling) to 19 (least favorable). Using a + # very low value reduces the chances to lose state-change events. + # Default is 0 but this example file sets it to most favourable + # scheduling as this is generally a good idea. See man nice(1) for + # more information. + # + Nice -20 + + # + # Select a different scheduler for the daemon, you can select between + # RR and FIFO and the process priority (minimum is 0, maximum is 99). + # See man sched_setscheduler(2) for more information. Using a RT + # scheduler reduces the chances to overrun the Netlink buffer. + # + # Scheduler { + # Type FIFO + # Priority 99 + # } + + # + # Number of buckets in the cache hashtable. The bigger it is, + # the closer it gets to O(1) at the cost of consuming more memory. + # Read some documents about tuning hashtables for further reference. + # + HashSize 32768 + + # + # Maximum number of conntracks, it should be double of: + # $ cat /proc/sys/net/netfilter/nf_conntrack_max + # since the daemon may keep some dead entries cached for possible + # retransmission during state synchronization. + # + HashLimit 131072 + + # + # Logfile: on (/var/log/conntrackd.log), off, or a filename + # Default: off + # + LogFile on + + # + # Syslog: on, off or a facility name (daemon (default) or local0..7) + # Default: off + # + #Syslog on + + # + # Lockfile + # + LockFile /var/lock/conntrack.lock + + # + # Unix socket configuration + # + UNIX { + Path /var/run/conntrackd.ctl + Backlog 20 + } + + # + # Netlink event socket buffer size. If you do not specify this clause, + # the default buffer size value in /proc/net/core/rmem_default is + # used. This default value is usually around 100 Kbytes which is + # fairly small for busy firewalls. This leads to event message dropping + # and high CPU consumption. This example configuration file sets the + # size to 2 MBytes to avoid this sort of problems. + # + NetlinkBufferSize 2097152 + + # + # The daemon doubles the size of the netlink event socket buffer size + # if it detects netlink event message dropping. This clause sets the + # maximum buffer size growth that can be reached. This example file + # sets the size to 8 MBytes. + # + NetlinkBufferSizeMaxGrowth 8388608 + + # + # If the daemon detects that Netlink is dropping state-change events, + # it automatically schedules a resynchronization against the Kernel + # after 30 seconds (default value). Resynchronizations are expensive + # in terms of CPU consumption since the daemon has to get the full + # kernel state-table and purge state-entries that do not exist anymore. + # Be careful of setting a very small value here. You have the following + # choices: On (enabled, use default 30 seconds value), Off (disabled) + # or Value (in seconds, to set a specific amount of time). If not + # specified, the daemon assumes that this option is enabled. + # + # NetlinkOverrunResync On + + # + # If you want reliable event reporting over Netlink, set on this + # option. If you set on this clause, it is a good idea to set off + # NetlinkOverrunResync. This option is off by default and you need + # a Linux kernel >= 2.6.31. + # + # NetlinkEventsReliable Off + + # + # By default, the daemon receives state updates following an + # event-driven model. You can modify this behaviour by switching to + # polling mode with the PollSecs clause. This clause tells conntrackd + # to dump the states in the kernel every N seconds. With regards to + # synchronization mode, the polling mode can only guarantee that + # long-lifetime states are recovered. The main advantage of this method + # is the reduction in the state replication at the cost of reducing the + # chances of recovering connections. + # + # PollSecs 15 + + # + # The daemon prioritizes the handling of state-change events coming + # from the core. With this clause, you can set the maximum number of + # state-change events (those coming from kernel-space) that the daemon + # will handle after which it will handle other events coming from the + # network or userspace. A low value improves interactivity (in terms of + # real-time behaviour) at the cost of extra CPU consumption. + # Default (if not set) is 100. + # + # EventIterationLimit 100 + + # + # Event filtering: This clause allows you to filter certain traffic, + # There are currently three filter-sets: Protocol, Address and + # State. The filter is attached to an action that can be: Accept or + # Ignore. Thus, you can define the event filtering policy of the + # filter-sets in positive or negative logic depending on your needs. + # You can select if conntrackd filters the event messages from + # user-space or kernel-space. The kernel-space event filtering + # saves some CPU cycles by avoiding the copy of the event message + # from kernel-space to user-space. The kernel-space event filtering + # is prefered, however, you require a Linux kernel >= 2.6.29 to + # filter from kernel-space. If you want to select kernel-space + # event filtering, use the keyword 'Kernelspace' instead of + # 'Userspace'. + # + Filter From Userspace { + # + # Accept only certain protocols: You may want to replicate + # the state of flows depending on their layer 4 protocol. + # + Protocol Accept { + TCP + SCTP + DCCP + # UDP + # ICMP # This requires a Linux kernel >= 2.6.31 + } + + # + # Ignore traffic for a certain set of IP's: Usually all the + # IP assigned to the firewall since local traffic must be + # ignored, only forwarded connections are worth to replicate. + # Note that these values depends on the local IPs that are + # assigned to the firewall. + # + Address Ignore { + IPv4_address 127.0.0.1 # loopback + IPv4_address [IGNORE_IP1] + IPv4_address [IGNORE_IP2] + IPv4_address [IGNORE_IP3] + #IPv4_address 192.168.0.100 # virtual IP 1 + #IPv4_address 192.168.1.100 # virtual IP 2 + #IPv4_address 192.168.0.1 + #IPv4_address 192.168.1.1 + #IPv4_address 192.168.100.100 # dedicated link ip + # + # You can also specify networks in format IP/cidr. + # IPv4_address 192.168.0.0/24 + # + # You can also specify an IPv6 address + # IPv6_address ::1 + } + + # + # Uncomment this line below if you want to filter by flow state. + # This option introduces a trade-off in the replication: it + # reduces CPU consumption at the cost of having lazy backup + # firewall replicas. The existing TCP states are: SYN_SENT, + # SYN_RECV, ESTABLISHED, FIN_WAIT, CLOSE_WAIT, LAST_ACK, + # TIME_WAIT, CLOSED, LISTEN. + # + # State Accept { + # ESTABLISHED CLOSED TIME_WAIT CLOSE_WAIT for TCP + # } + } +} diff --git a/patches/systemvm/debian/config/root/redundant_router/disable_pubip.sh b/patches/systemvm/debian/config/root/redundant_router/disable_pubip.sh new file mode 100644 index 00000000000..9a9a2c6f72f --- /dev/null +++ b/patches/systemvm/debian/config/root/redundant_router/disable_pubip.sh @@ -0,0 +1,4 @@ +#!/bin/bash + +ifconfig eth2 down +service dnsmasq stop diff --git a/patches/systemvm/debian/config/root/redundant_router/enable_pubip.sh.templ b/patches/systemvm/debian/config/root/redundant_router/enable_pubip.sh.templ new file mode 100644 index 00000000000..93539422753 --- /dev/null +++ b/patches/systemvm/debian/config/root/redundant_router/enable_pubip.sh.templ @@ -0,0 +1,7 @@ +#!/bin/bash + +ifconfig eth2 down && \ +ifconfig eth2 hw ether [ETH2MAC] && \ +ifconfig eth2 up && \ +ip route add 0/0 via [GATEWAY] && \ +service dnsmasq restart diff --git a/patches/systemvm/debian/config/root/redundant_router/fault.sh b/patches/systemvm/debian/config/root/redundant_router/fault.sh new file mode 100644 index 00000000000..7e09fd58543 --- /dev/null +++ b/patches/systemvm/debian/config/root/redundant_router/fault.sh @@ -0,0 +1,6 @@ +#!/bin/bash + +echo To fault called >> /root/keepalived.log +/root/redundant_router/disable_pubip.sh >> /root/keepalived.log 2>&1 +/root/redundant_router/primary-backup.sh fault >> /root/keepalived.log 2>&1 +echo Status: FAULT >> /root/keepalived.log diff --git a/patches/systemvm/debian/config/root/redundant_router/keepalived.conf.templ b/patches/systemvm/debian/config/root/redundant_router/keepalived.conf.templ new file mode 100644 index 00000000000..ed59ba18c71 --- /dev/null +++ b/patches/systemvm/debian/config/root/redundant_router/keepalived.conf.templ @@ -0,0 +1,56 @@ +! Configuration File for keepalived + +global_defs { + router_id [ROUTER_ID] +} + +vrrp_sync_group VG_1 { + group { + inside_network # name of vrrp_instance (below) + outside_network # One for each moveable IP. + } + + # notify scripts and alerts are optional + # + # filenames of scripts to run on transitions + # can be unquoted (if just filename) + # or quoted (if has parameters) + # to MASTER transition + notify_master "/root/redundant_router/master.sh" + # to BACKUP transition + notify_backup "/root/redundant_router/backup.sh" + # FAULT transition + notify_fault "/root/redundant_router/fault.sh" + + # for ANY state transition. + # "notify" script is called AFTER the + # notify_* script(s) and is executed + # with 3 arguments provided by keepalived + # (ie don't include parameters in the notify line). + # arguments + # $1 = "GROUP"|"INSTANCE" + # $2 = name of group or instance + # $3 = target state of transition + # ("MASTER"|"BACKUP"|"FAULT") + #notify /root/redundant_router/notify.sh +} + + +vrrp_instance inside_network { + state BACKUP + interface eth0 + virtual_router_id 51 + priority [PRIORITY] + + advert_int 1 + authentication { + auth_type PASS + auth_pass WORD + } + + virtual_ipaddress { + [ROUTER_IP] brd [BOARDCAST] dev eth0 + } + + nopreempt +} diff --git a/patches/systemvm/debian/config/root/redundant_router/master.sh b/patches/systemvm/debian/config/root/redundant_router/master.sh new file mode 100644 index 00000000000..d17abfa9199 --- /dev/null +++ b/patches/systemvm/debian/config/root/redundant_router/master.sh @@ -0,0 +1,32 @@ +#!/bin/bash + +LOCK=/tmp/rrouter.lock +locked=0 + +# Wait the lock +for i in `seq 1 5` +do + if [ ! -e $LOCK ] + then + touch $LOCK + locked=1 + break + fi + sleep 1 + echo sleep 1 +done + +if [ $locked -eq 0 ] +then + echo Status: fail to get the lock! >> /root/keepalived.log + exit +fi + +echo To master called >> /root/keepalived.log +/root/redundant_router/enable_pubip.sh >> /root/keepalived.log 2>&1 +echo Enable public ip $? >> /root/keepalived.log +/root/redundant_router/primary-backup.sh primary >> /root/keepalived.log 2>&1 +echo Switch conntrackd mode primary $? >> /root/keepalived.log +echo Status: MASTER >> /root/keepalived.log + +rm $LOCK diff --git a/patches/systemvm/debian/config/root/redundant_router/primary-backup.sh b/patches/systemvm/debian/config/root/redundant_router/primary-backup.sh new file mode 100644 index 00000000000..c53fe2e2a18 --- /dev/null +++ b/patches/systemvm/debian/config/root/redundant_router/primary-backup.sh @@ -0,0 +1,128 @@ +#!/bin/sh +# +# (C) 2008 by Pablo Neira Ayuso +# +# This software may be used and distributed according to the terms +# of the GNU General Public License, incorporated herein by reference. +# +# Description: +# +# This is the script for primary-backup setups for keepalived +# (http://www.keepalived.org). You may adapt it to make it work with other +# high-availability managers. +# +# Do not forget to include the required modifications to your keepalived.conf +# file to invoke this script during keepalived's state transitions. +# +# Contributions to improve this script are welcome :). +# + +CONNTRACKD_BIN=/usr/sbin/conntrackd +CONNTRACKD_LOCK=/var/lock/conntrack.lock +CONNTRACKD_CONFIG=/etc/conntrackd/conntrackd.conf +CONNTRACKD_LOG=/root/keepalived.log + +case "$1" in + primary) + # + # commit the external cache into the kernel table + # + $CONNTRACKD_BIN -C $CONNTRACKD_CONFIG -c + if [ $? -eq 1 ] + then + logger "ERROR: failed to invoke conntrackd -c" + fi + + # + # flush the internal and the external caches + # + $CONNTRACKD_BIN -C $CONNTRACKD_CONFIG -f + if [ $? -eq 1 ] + then + logger "ERROR: failed to invoke conntrackd -f" + fi + + # + # resynchronize my internal cache to the kernel table + # + $CONNTRACKD_BIN -C $CONNTRACKD_CONFIG -R + if [ $? -eq 1 ] + then + logger "ERROR: failed to invoke conntrackd -R" + fi + + # + # send a bulk update to backups + # + $CONNTRACKD_BIN -C $CONNTRACKD_CONFIG -B + if [ $? -eq 1 ] + then + logger "ERROR: failed to invoke conntrackd -B" + fi + echo Conntrackd switch to primary done >> $CONNTRACKD_LOG + ;; + backup) + # + # is conntrackd running? request some statistics to check it + # + $CONNTRACKD_BIN -C $CONNTRACKD_CONFIG -s + if [ $? -eq 1 ] + then + # + # something's wrong, do we have a lock file? + # + if [ -f $CONNTRACKD_LOCK ] + then + logger "WARNING: conntrackd was not cleanly stopped." + logger "If you suspect that it has crashed:" + logger "1) Enable coredumps" + logger "2) Try to reproduce the problem" + logger "3) Post the coredump to netfilter-devel@vger.kernel.org" + rm -f $CONNTRACKD_LOCK + fi + $CONNTRACKD_BIN -C $CONNTRACKD_CONFIG -d + if [ $? -eq 1 ] + then + logger "ERROR: cannot launch conntrackd" + exit 1 + fi + fi + # + # shorten kernel conntrack timers to remove the zombie entries. + # + $CONNTRACKD_BIN -C $CONNTRACKD_CONFIG -t + if [ $? -eq 1 ] + then + logger "ERROR: failed to invoke conntrackd -t" + fi + + # + # request resynchronization with master firewall replica (if any) + # Note: this does nothing in the alarm approach. + # + $CONNTRACKD_BIN -C $CONNTRACKD_CONFIG -n + if [ $? -eq 1 ] + then + logger "ERROR: failed to invoke conntrackd -n" + fi + echo Conntrackd switch to backup done >> $CONNTRACKD_LOG + ;; + fault) + # + # shorten kernel conntrack timers to remove the zombie entries. + # + $CONNTRACKD_BIN -C $CONNTRACKD_CONFIG -t + if [ $? -eq 1 ] + then + logger "ERROR: failed to invoke conntrackd -t" + fi + echo Conntrackd switch to fault done >> $CONNTRACKD_LOG + ;; + *) + logger "conntrackd: ERROR: unknown state transition: " $1 + echo "Usage: primary-backup.sh {primary|backup|fault}" + exit 1 + ;; +esac + +exit 0 diff --git a/scripts/network/domr/getRouterStatus.sh b/scripts/network/domr/getRouterStatus.sh new file mode 100644 index 00000000000..1239d11d24a --- /dev/null +++ b/scripts/network/domr/getRouterStatus.sh @@ -0,0 +1,39 @@ +#!/bin/bash + +usage() { + printf "Usage:\n %s \n" $(basename $0) >&2 + printf " %s \n" $(basename $0) >&2 +} + +cert="/root/.ssh/id_rsa.cloud" +domRIp=$1 +shift + +# check if gateway domain is up and running +check_gw() { + ping -c 1 -n -q $1 > /dev/null + if [ $? -gt 0 ] + then + sleep 1 + ping -c 1 -n -q $1 > /dev/null + fi + return $?; +} + + +# Check if DomR is up and running. If not, exit with error code 1. +check_gw "$domRIp" +if [ $? -gt 0 ] +then + exit 1 +fi + +tmpfile=/tmp/$RANDOM.log + +scp -P 3922 -q -o StrictHostKeyChecking=no -i $cert root@$domRIp:/root/keepalived.log $tmpfile +if [ $? -ne 0 ] +then + exit $? +fi +result=`tail $tmpfile -n 1` +echo $result diff --git a/server/src/com/cloud/configuration/Config.java b/server/src/com/cloud/configuration/Config.java index f80c90560b8..107104c5b0b 100755 --- a/server/src/com/cloud/configuration/Config.java +++ b/server/src/com/cloud/configuration/Config.java @@ -65,6 +65,7 @@ public enum Config { OvsNetwork("Network", ManagementServer.class, Boolean.class, "open.vswitch.vlan.network", "false", "enable/disable vlan remapping of open vswitch network", null), OvsTunnelNetwork("Network", ManagementServer.class, Boolean.class, "open.vswitch.tunnel.network", "false", "enable/disable open vswitch tunnel network(no vlan)", null), VmNetworkThrottlingRate("Network", ManagementServer.class, Integer.class, "vm.network.throttling.rate", "200", "Default data transfer rate in megabits per second allowed in User vm's default network.", null), + RedundantRouter("Network", ManagementServer.class, Boolean.class, "network.redundantrouter", "false", "enable/disable redundant virtual router", null), // Usage CapacityCheckPeriod("Usage", ManagementServer.class, Integer.class, "capacity.check.period", "300000", "The interval in milliseconds between capacity checks", null), diff --git a/server/src/com/cloud/ha/UserVmDomRInvestigator.java b/server/src/com/cloud/ha/UserVmDomRInvestigator.java index 4c982c9b3a6..24a7f0f8cf7 100644 --- a/server/src/com/cloud/ha/UserVmDomRInvestigator.java +++ b/server/src/com/cloud/ha/UserVmDomRInvestigator.java @@ -78,15 +78,22 @@ public class UserVmDomRInvestigator extends AbstractInvestigatorImpl { continue; } - VirtualRouter router = _vnaMgr.getRouterForNetwork(nic.getNetworkId()); - if (router == null) { + List routers = _vnaMgr.getRoutersForNetwork(nic.getNetworkId()); + if (routers == null || routers.isEmpty()) { if (s_logger.isDebugEnabled()) { s_logger.debug("Unable to find a router in network " + nic.getNetworkId() + " to ping " + vm); } continue; } - Boolean result = testUserVM(vm, nic, router); + Boolean result = null; + for (VirtualRouter router : routers) { + result = testUserVM(vm, nic, router); + if (result != null) { + break; + } + } + if (result == null) { continue; } diff --git a/server/src/com/cloud/network/NetworkManagerImpl.java b/server/src/com/cloud/network/NetworkManagerImpl.java index 5a64dcba743..3b176ffa8a3 100755 --- a/server/src/com/cloud/network/NetworkManagerImpl.java +++ b/server/src/com/cloud/network/NetworkManagerImpl.java @@ -30,6 +30,7 @@ import java.util.List; import java.util.Map; import java.util.Random; import java.util.Set; +import java.util.Random; import java.util.TreeSet; import java.util.concurrent.Executors; import java.util.concurrent.ScheduledExecutorService; @@ -3022,7 +3023,6 @@ public class NetworkManagerImpl implements NetworkManager, NetworkService, Manag return result; } - protected String getZoneNetworkDomain(long zoneId) { return _dcDao.findById(zoneId).getDomain(); } diff --git a/server/src/com/cloud/network/element/DhcpElement.java b/server/src/com/cloud/network/element/DhcpElement.java index 4540115c9f4..092f74588f2 100644 --- a/server/src/com/cloud/network/element/DhcpElement.java +++ b/server/src/com/cloud/network/element/DhcpElement.java @@ -131,20 +131,28 @@ public class DhcpElement extends AdapterBase implements NetworkElement, Password @Override public boolean shutdown(Network network, ReservationContext context) throws ConcurrentOperationException, ResourceUnavailableException { - DomainRouterVO router = _routerDao.findByNetwork(network.getId()); - if (router == null) { + List routers = _routerDao.findByNetwork(network.getId()); + if (routers == null || routers.isEmpty()) { return true; } - return (_routerMgr.stop(router, false, context.getCaller(), context.getAccount()) != null); + boolean result = true; + for (DomainRouterVO router : routers) { + result = result && _routerMgr.stop(router, false, context.getCaller(), context.getAccount()) != null; + } + return result; } @Override public boolean destroy(Network config) throws ConcurrentOperationException, ResourceUnavailableException{ - DomainRouterVO router = _routerDao.findByNetwork(config.getId()); - if (router == null) { + List routers = _routerDao.findByNetwork(config.getId()); + if (routers == null || routers.isEmpty()) { return true; } - return _routerMgr.destroyRouter(router.getId()); + boolean result = true; + for (DomainRouterVO router : routers) { + result = result && _routerMgr.destroyRouter(router.getId()); + } + return result; } @Override @@ -186,29 +194,30 @@ public class DhcpElement extends AdapterBase implements NetworkElement, Password DataCenter dc = _configMgr.getZone(network.getDataCenterId()); NetworkOffering offering = _configMgr.getNetworkOffering(network.getNetworkOfferingId()); DeployDestination dest = new DeployDestination(dc, null, null, null); - DomainRouterVO router = _routerDao.findByNetwork(network.getId()); - if (router == null) { + List routers = _routerDao.findByNetwork(network.getId()); + if (routers == null || routers.isEmpty()) { s_logger.trace("Can't find dhcp element in network " + network.getId()); return true; } VirtualRouter result = null; - if (canHandle(network.getGuestType(), dest, offering.getTrafficType())) { - if (router.getState() == State.Stopped) { - result = _routerMgr.startRouter(router.getId(), false); + boolean ret = true; + for (DomainRouterVO router : routers) { + if (canHandle(network.getGuestType(), dest, offering.getTrafficType())) { + if (router.getState() == State.Stopped) { + result = _routerMgr.startRouter(router.getId(), false); + } else { + result = _routerMgr.rebootRouter(router.getId(), false); + } + if (result == null) { + s_logger.warn("Failed to restart dhcp element " + router + " as a part of netowrk " + network + " restart"); + ret = false; + } } else { - result = _routerMgr.rebootRouter(router.getId(), false); + s_logger.trace("Dhcp element doesn't handle network restart for the network " + network); } - if (result == null) { - s_logger.warn("Failed to restart dhcp element " + router + " as a part of netowrk " + network + " restart"); - return false; - } else { - return true; - } - } else { - s_logger.trace("Dhcp element doesn't handle network restart for the network " + network); - return true; } + return ret; } @Override diff --git a/server/src/com/cloud/network/element/VirtualRouterElement.java b/server/src/com/cloud/network/element/VirtualRouterElement.java index 3fe46b969f2..1fe295866b2 100644 --- a/server/src/com/cloud/network/element/VirtualRouterElement.java +++ b/server/src/com/cloud/network/element/VirtualRouterElement.java @@ -123,29 +123,30 @@ public class VirtualRouterElement extends DhcpElement implements NetworkElement public boolean restart(Network network, ReservationContext context) throws ConcurrentOperationException, ResourceUnavailableException, InsufficientCapacityException{ DataCenter dc = _configMgr.getZone(network.getDataCenterId()); DeployDestination dest = new DeployDestination(dc, null, null, null); - DomainRouterVO router = _routerDao.findByNetwork(network.getId()); - if (router == null) { + List routers = _routerDao.findByNetwork(network.getId()); + if (routers == null || routers.isEmpty()) { s_logger.trace("Can't find virtual router element in network " + network.getId()); return true; } - + VirtualRouter result = null; - if (canHandle(network.getGuestType(), dest.getDataCenter())) { - if (router.getState() == State.Stopped) { - result = _routerMgr.startRouter(router.getId(), false); + boolean ret = true; + for (DomainRouterVO router : routers) { + if (canHandle(network.getGuestType(), dest.getDataCenter())) { + if (router.getState() == State.Stopped) { + result = _routerMgr.startRouter(router.getId(), false); + } else { + result = _routerMgr.rebootRouter(router.getId(), false); + } + if (result == null) { + s_logger.warn("Failed to restart virtual router element " + router + " as a part of netowrk " + network + " restart"); + ret = false; + } } else { - result = _routerMgr.rebootRouter(router.getId(), false); + s_logger.trace("Virtual router element doesn't handle network restart for the network " + network); } - if (result == null) { - s_logger.warn("Failed to restart virtual router element " + router + " as a part of netowrk " + network + " restart"); - return false; - } else { - return true; - } - } else { - s_logger.trace("Virtual router element doesn't handle network restart for the network " + network); - return true; } + return ret; } @Override @@ -154,8 +155,8 @@ public class VirtualRouterElement extends DhcpElement implements NetworkElement DataCenter dc = _configMgr.getZone(config.getDataCenterId()); if (canHandle(config.getGuestType(),dc)) { long networkId = config.getId(); - DomainRouterVO router = _routerDao.findByNetwork(networkId); - if (router == null) { + List routers = _routerDao.findByNetwork(networkId); + if (routers == null || routers.isEmpty()) { s_logger.debug("Virtual router elemnt doesn't need to apply firewall rules on the backend; virtual router doesn't exist in the network " + config.getId()); return true; } @@ -171,8 +172,8 @@ public class VirtualRouterElement extends DhcpElement implements NetworkElement DataCenter dc = _configMgr.getZone(network.getDataCenterId()); if (canHandle(network.getGuestType(),dc)) { - DomainRouterVO router = _routerDao.findByNetwork(network.getId()); - if (router == null) { + List routers = _routerDao.findByNetwork(network.getId()); + if (routers == null || routers.isEmpty()) { s_logger.debug("Virtual router elemnt doesn't need to associate ip addresses on the backend; virtual router doesn't exist in the network " + network.getId()); return true; } diff --git a/server/src/com/cloud/network/guru/GuestNetworkGuru.java b/server/src/com/cloud/network/guru/GuestNetworkGuru.java index 80c2416bdb6..0678ce4189e 100644 --- a/server/src/com/cloud/network/guru/GuestNetworkGuru.java +++ b/server/src/com/cloud/network/guru/GuestNetworkGuru.java @@ -17,11 +17,6 @@ */ package com.cloud.network.guru; -import java.util.List; -import java.util.Random; -import java.util.Set; -import java.util.TreeSet; - import javax.ejb.Local; import org.apache.log4j.Logger; @@ -79,7 +74,6 @@ public class GuestNetworkGuru extends AdapterBase implements NetworkGuru { String _defaultGateway; String _defaultCidr; - Random _rand = new Random(System.currentTimeMillis()); protected GuestNetworkGuru() { super(); @@ -222,25 +216,6 @@ public class GuestNetworkGuru extends AdapterBase implements NetworkGuru { } } - @DB - protected String acquireGuestIpAddress(Network network) { - List ips = _nicDao.listIpAddressInNetwork(network.getId()); - String[] cidr = network.getCidr().split("/"); - Set allPossibleIps = NetUtils.getAllIpsFromCidr(cidr[0], Integer.parseInt(cidr[1])); - Set usedIps = new TreeSet(); - for (String ip : ips) { - usedIps.add(NetUtils.ip2Long(ip)); - } - if (usedIps.size() != 0) { - allPossibleIps.removeAll(usedIps); - } - if (allPossibleIps.isEmpty()) { - return null; - } - Long[] array = allPossibleIps.toArray(new Long[allPossibleIps.size()]); - return NetUtils.long2Ip(array[_rand.nextInt(array.length)]); - } - @Override public void reserve(NicProfile nic, Network network, VirtualMachineProfile vm, DeployDestination dest, ReservationContext context) throws InsufficientVirtualNetworkCapcityException, InsufficientAddressCapacityException { diff --git a/server/src/com/cloud/network/ovs/OvsNetworkManagerImpl.java b/server/src/com/cloud/network/ovs/OvsNetworkManagerImpl.java index c8aa7e3b0b1..cd27ce9a100 100644 --- a/server/src/com/cloud/network/ovs/OvsNetworkManagerImpl.java +++ b/server/src/com/cloud/network/ovs/OvsNetworkManagerImpl.java @@ -395,10 +395,10 @@ public class OvsNetworkManagerImpl implements OvsNetworkManager { long hostId = dest.getHost().getId(); long accountId = instance.getAccountId(); Listvms = _userVmDao.listByAccountId(accountId); - DomainRouterVO router = _routerDao.findBy(accountId, instance.getDataCenterIdToDeployIn()); + List routers = _routerDao.findBy(accountId, instance.getDataCenterIdToDeployIn()); Listins = new ArrayList(); ins.addAll(vms); - ins.add(router); + ins.addAll(routers); ListtoHostIds = new ArrayList(); ListfromHostIds = new ArrayList(); @@ -538,8 +538,8 @@ public class OvsNetworkManagerImpl implements OvsNetworkManager { } long accountId = instance.getAccountId(); - DomainRouterVO router = _routerDao.findBy(accountId, instance.getDataCenterIdToDeployIn()); - if (router == null) { + List routers = _routerDao.findBy(accountId, instance.getDataCenterIdToDeployIn()); + if (routers.size() == 0) { return; } @@ -547,18 +547,20 @@ public class OvsNetworkManagerImpl implements OvsNetworkManager { return; } - try { - long hostId = router.getHostId(); - String tag = Long.toString(_vlanMappingDao.findByAccountIdAndHostId(accountId, hostId).getVlan()); - VmFlowLogVO log = _flowLogDao.findOrNewByVmId(instance.getId(), instance.getHostName()); - String vlans = getVlanInPortMapping(accountId, hostId); - s_logger.debug("ask router " + router.getHostName() + " on host " - + hostId + " update vlan map to " + vlans); - Commands cmds = new Commands(new OvsSetTagAndFlowCommand( - router.getHostName(), tag, vlans, Long.toString(log.getLogsequence()), instance.getId())); - _agentMgr.send(router.getHostId(), cmds, _ovsListener); - } catch (Exception e) { - s_logger.warn("apply flow to router failed", e); + for (DomainRouterVO router : routers) { + try { + long hostId = router.getHostId(); + String tag = Long.toString(_vlanMappingDao.findByAccountIdAndHostId(accountId, hostId).getVlan()); + VmFlowLogVO log = _flowLogDao.findOrNewByVmId(instance.getId(), instance.getHostName()); + String vlans = getVlanInPortMapping(accountId, hostId); + s_logger.debug("ask router " + router.getHostName() + " on host " + + hostId + " update vlan map to " + vlans); + Commands cmds = new Commands(new OvsSetTagAndFlowCommand( + router.getHostName(), tag, vlans, Long.toString(log.getLogsequence()), instance.getId())); + _agentMgr.send(router.getHostId(), cmds, _ovsListener); + } catch (Exception e) { + s_logger.warn("apply flow to router failed", e); + } } } @@ -639,9 +641,11 @@ public class OvsNetworkManagerImpl implements OvsNetworkManager { } if (tellRouter && instance.getType() != VirtualMachine.Type.DomainRouter) { - DomainRouterVO router = _routerDao.findBy(accountId, instance.getDataCenterIdToDeployIn()); - if (router != null) { - affectedVms.add(new Long(router.getId())); + List routers = _routerDao.findBy(accountId, instance.getDataCenterIdToDeployIn()); + for (DomainRouterVO router : routers) { + if (router != null) { + affectedVms.add(new Long(router.getId())); + } } } return affectedVms; diff --git a/server/src/com/cloud/network/ovs/OvsTunnelManagerImpl.java b/server/src/com/cloud/network/ovs/OvsTunnelManagerImpl.java index 209c23032d2..3215db198d5 100644 --- a/server/src/com/cloud/network/ovs/OvsTunnelManagerImpl.java +++ b/server/src/com/cloud/network/ovs/OvsTunnelManagerImpl.java @@ -153,13 +153,13 @@ public class OvsTunnelManagerImpl implements OvsTunnelManager { long hostId = dest.getHost().getId(); long accountId = instance.getAccountId(); Listvms = _userVmDao.listByAccountId(accountId); - DomainRouterVO router = _routerDao.findBy(accountId, instance.getDataCenterIdToDeployIn()); + List routers = _routerDao.findBy(accountId, instance.getDataCenterIdToDeployIn()); Listins = new ArrayList(); if (vms != null) { ins.addAll(vms); } - if (router != null) { - ins.add(router); + if (routers.size() != 0) { + ins.addAll(routers); } List>toHosts = new ArrayList>(); List>fromHosts = new ArrayList>(); @@ -284,9 +284,11 @@ public class OvsTunnelManagerImpl implements OvsTunnelManager { return; } - DomainRouterVO router = _routerDao.findBy(vm.getAccountId(), vm.getDataCenterIdToDeployIn()); - if (router.getHostId() == vm.getHostId()) { - return; + List routers = _routerDao.findBy(vm.getAccountId(), vm.getDataCenterIdToDeployIn()); + for (DomainRouterVO router : routers) { + if (router.getHostId() == vm.getHostId()) { + return; + } } } else if (vm.getType() == VirtualMachine.Type.DomainRouter && userVms.size() != 0) { return; diff --git a/server/src/com/cloud/network/router/VirtualNetworkApplianceManager.java b/server/src/com/cloud/network/router/VirtualNetworkApplianceManager.java index bc65cb4f1d7..8bd1b76b53f 100644 --- a/server/src/com/cloud/network/router/VirtualNetworkApplianceManager.java +++ b/server/src/com/cloud/network/router/VirtualNetworkApplianceManager.java @@ -32,6 +32,7 @@ import com.cloud.user.Account; import com.cloud.user.User; import com.cloud.uservm.UserVm; import com.cloud.utils.component.Manager; +import com.cloud.vm.DomainRouterVO; import com.cloud.vm.NicProfile; import com.cloud.vm.ReservationContext; import com.cloud.vm.VirtualMachineProfile; @@ -63,19 +64,18 @@ public interface VirtualNetworkApplianceManager extends Manager, VirtualNetworkA boolean getRouterStatistics(long vmId, Map netStats, Map diskStats); - VirtualRouter getRouter(long accountId, long zoneId); + List getRouters(long accountId, long zoneId); - VirtualRouter deployVirtualRouter(Network guestNetwork, DeployDestination dest, Account owner, Map params) throws InsufficientCapacityException, ResourceUnavailableException, ConcurrentOperationException; + List deployVirtualRouter(Network guestNetwork, DeployDestination dest, Account owner, Map params) throws InsufficientCapacityException, ResourceUnavailableException, ConcurrentOperationException; + List deployDhcp(Network guestNetwork, DeployDestination dest, Account owner, Map params) throws InsufficientCapacityException, ResourceUnavailableException, ConcurrentOperationException; - VirtualRouter deployDhcp(Network guestNetwork, DeployDestination dest, Account owner, Map params) throws InsufficientCapacityException, ResourceUnavailableException, ConcurrentOperationException; - - VirtualRouter addVirtualMachineIntoNetwork(Network config, NicProfile nic, VirtualMachineProfile vm, DeployDestination dest, ReservationContext context, Boolean startDhcp) throws ConcurrentOperationException, InsufficientCapacityException, ResourceUnavailableException; + List addVirtualMachineIntoNetwork(Network config, NicProfile nic, VirtualMachineProfile vm, DeployDestination dest, ReservationContext context, Boolean startDhcp) throws ConcurrentOperationException, InsufficientCapacityException, ResourceUnavailableException; boolean associateIP (Network network, List ipAddress) throws ResourceUnavailableException; boolean applyFirewallRules(Network network, List rules) throws ResourceUnavailableException; - VirtualRouter getRouterForNetwork(long networkId); + List getRoutersForNetwork(long networkId); VirtualRouter stop(VirtualRouter router, boolean forced, User callingUser, Account callingAccount) throws ConcurrentOperationException, ResourceUnavailableException; } diff --git a/server/src/com/cloud/network/router/VirtualNetworkApplianceManagerImpl.java b/server/src/com/cloud/network/router/VirtualNetworkApplianceManagerImpl.java index 52940e1df1c..5f60e25f5b0 100755 --- a/server/src/com/cloud/network/router/VirtualNetworkApplianceManagerImpl.java +++ b/server/src/com/cloud/network/router/VirtualNetworkApplianceManagerImpl.java @@ -35,6 +35,8 @@ import org.apache.log4j.Logger; import com.cloud.agent.AgentManager; import com.cloud.agent.AgentManager.OnError; import com.cloud.agent.api.Answer; +import com.cloud.agent.api.CheckRouterAnswer; +import com.cloud.agent.api.CheckRouterCommand; import com.cloud.agent.api.ModifySshKeysCommand; import com.cloud.agent.api.NetworkUsageAnswer; import com.cloud.agent.api.NetworkUsageCommand; @@ -60,6 +62,7 @@ import com.cloud.api.commands.StartRouterCmd; import com.cloud.api.commands.UpgradeRouterCmd; import com.cloud.async.AsyncJobManager; import com.cloud.capacity.dao.CapacityDao; +import com.cloud.cluster.ManagementServerNode; import com.cloud.configuration.Config; import com.cloud.configuration.ConfigurationManager; import com.cloud.configuration.dao.ConfigurationDao; @@ -86,6 +89,7 @@ import com.cloud.exception.OperationTimedoutException; import com.cloud.exception.PermissionDeniedException; import com.cloud.exception.ResourceUnavailableException; import com.cloud.exception.StorageUnavailableException; +import com.cloud.host.HostVO; import com.cloud.host.dao.HostDao; import com.cloud.hypervisor.Hypervisor.HypervisorType; import com.cloud.network.IPAddressVO; @@ -274,15 +278,17 @@ public class VirtualNetworkApplianceManagerImpl implements VirtualNetworkApplian String _mgmt_cidr; int _routerStatsInterval = 300; + int _checkRouterInterval = 30; private ServiceOfferingVO _offering; private String trafficSentinelHostname; ScheduledExecutorService _executor; + ScheduledExecutorService _checkExecutor; Account _systemAcct; @Override - public DomainRouterVO getRouter(long accountId, long dataCenterId) { + public List getRouters(long accountId, long dataCenterId) { return _routerDao.findBy(accountId, dataCenterId); } @@ -374,8 +380,8 @@ public class VirtualNetworkApplianceManagerImpl implements VirtualNetworkApplian @Override public boolean savePasswordToRouter(Network network, NicProfile nic, VirtualMachineProfile profile) throws ResourceUnavailableException { - DomainRouterVO router = _routerDao.findByNetwork(network.getId()); - if (router == null) { + List routers = _routerDao.findByNetwork(network.getId()); + if (routers == null || routers.isEmpty()) { s_logger.warn("Unable save password, router doesn't exist in network " + network.getId()); throw new CloudRuntimeException("Unable to save password to router"); } @@ -384,18 +390,17 @@ public class VirtualNetworkApplianceManagerImpl implements VirtualNetworkApplian String password = (String) profile.getParameter(Param.VmPassword); String encodedPassword = PasswordGenerator.rot13(password); - Commands cmds = new Commands(OnError.Continue); - SavePasswordCommand cmd = new SavePasswordCommand(encodedPassword, nic.getIp4Address(), userVm.getHostName()); - cmd.setAccessDetail(NetworkElementCommand.ROUTER_IP, router.getPrivateIpAddress()); - cmd.setAccessDetail(NetworkElementCommand.ROUTER_GUEST_IP, router.getGuestIpAddress()); - cmd.setAccessDetail(NetworkElementCommand.ROUTER_NAME, router.getInstanceName()); + boolean result = true; + for (DomainRouterVO router : routers) { + Commands cmds = new Commands(OnError.Continue); + SavePasswordCommand cmd = new SavePasswordCommand(encodedPassword, nic.getIp4Address(), userVm.getHostName()); + cmd.setAccessDetail(NetworkElementCommand.ROUTER_IP, router.getPrivateIpAddress()); + cmd.setAccessDetail(NetworkElementCommand.ROUTER_NAME, router.getInstanceName()); + cmds.addCommand("password", cmd); - DataCenterVO dcVo = _dcDao.findById(router.getDataCenterIdToDeployIn()); - cmd.setAccessDetail(NetworkElementCommand.ZONE_NETWORK_TYPE, dcVo.getNetworkType().toString()); - - cmds.addCommand("password", cmd); - - return sendCommandsToRouter(router, cmds); + result = result && sendCommandsToRouter(router, cmds); + } + return result; } @Override @ActionEvent(eventType = EventTypes.EVENT_ROUTER_STOP, eventDescription = "stopping router Vm", async = true) @@ -518,6 +523,7 @@ public class VirtualNetworkApplianceManagerImpl implements VirtualNetworkApplian _name = name; _executor = Executors.newScheduledThreadPool(1, new NamedThreadFactory("RouterMonitor")); + _checkExecutor = Executors.newScheduledThreadPool(1, new NamedThreadFactory("RouterStatusMonitor")); final ComponentLocator locator = ComponentLocator.getCurrentLocator(); @@ -576,6 +582,7 @@ public class VirtualNetworkApplianceManagerImpl implements VirtualNetworkApplian @Override public boolean start() { _executor.scheduleAtFixedRate(new NetworkUsageTask(), _routerStatsInterval, _routerStatsInterval, TimeUnit.SECONDS); + _checkExecutor.scheduleAtFixedRate(new CheckRouterTask(), _checkRouterInterval, _checkRouterInterval, TimeUnit.SECONDS); return true; } @@ -688,12 +695,74 @@ public class VirtualNetworkApplianceManagerImpl implements VirtualNetworkApplian } } + protected class CheckRouterTask implements Runnable { + + public CheckRouterTask() { + } + + @Override + public void run() { + + final List routers = _routerDao.listVirtualUpByHostId(null); + s_logger.debug("Found " + routers.size() + " running routers. "); + + for (DomainRouterVO router : routers) { + if (!router.getIsRedundantRouter()) { + continue; + } + String privateIP = router.getPrivateIpAddress(); + HostVO host = _hostDao.findById(router.getHostId()); + /* Only cover hosts managed by this management server */ + if (host.getManagementServerId() != ManagementServerNode.getManagementServerId()) { + continue; + } + if (privateIP != null) { + final CheckRouterCommand command = new CheckRouterCommand(); + command.setAccessDetail(NetworkElementCommand.ROUTER_IP, router.getPrivateIpAddress()); + command.setAccessDetail(NetworkElementCommand.ROUTER_NAME, router.getInstanceName()); + final CheckRouterAnswer answer = (CheckRouterAnswer) _agentMgr.easySend(router.getHostId(), command); + if (answer != null) { + if (answer.getResult()) { + router.setIsMaster(answer.getIsMaster()); + } else { + router.setIsMaster(false); + } + } else { + router.setIsMaster(false); + } + Transaction txn = Transaction.open(Transaction.CLOUD_DB); + try { + txn.start(); + _routerDao.update(router.getId(), router); + txn.commit(); + } catch (Exception e) { + txn.rollback(); + s_logger.warn("Unable to update router status for account: " + router.getAccountId()); + } finally { + txn.close(); + } + } + } + } + } + public static boolean isAdmin(short accountType) { return ((accountType == Account.ACCOUNT_TYPE_ADMIN) || (accountType == Account.ACCOUNT_TYPE_DOMAIN_ADMIN) || (accountType == Account.ACCOUNT_TYPE_READ_ONLY_ADMIN) || (accountType == Account.ACCOUNT_TYPE_RESOURCE_DOMAIN_ADMIN)); } @DB - protected DomainRouterVO createVirtualRouter(Network guestNetwork, DataCenterDeployment plan, HypervisorType type, Account owner) throws ConcurrentOperationException, InsufficientCapacityException { + protected List findOrCreateVirtualRouter(Network guestNetwork, DataCenterDeployment plan, HypervisorType type, Account owner) throws ConcurrentOperationException, InsufficientCapacityException { + List routers = _routerDao.findByNetwork(guestNetwork.getId()); + + boolean isRedundant = _configDao.getValue("network.redundantrouter").equals("true"); + + int router_nr = 1; + if (isRedundant) { + router_nr = 2; + } + if (routers.size() == router_nr) { + return routers; + } /* Before starting router, already know the hypervisor type */ VMTemplateVO template = _templateDao.findRoutingTemplate(type); @@ -702,7 +771,6 @@ public class VirtualNetworkApplianceManagerImpl implements VirtualNetworkApplian NetworkOfferingVO controlOffering = offerings.get(0); NetworkVO controlConfig = _networkMgr.setupNetwork(_systemAcct, controlOffering, plan, null, null, false, false).get(0); - List> networks = new ArrayList>(3); NetworkOfferingVO publicOffering = _networkMgr.getSystemAccountNetworkOfferings(NetworkOfferingVO.SystemPublicNetwork).get(0); List publicNetworks = _networkMgr.setupNetwork(_systemAcct, publicOffering, plan, null, null, false, false); @@ -712,19 +780,15 @@ public class VirtualNetworkApplianceManagerImpl implements VirtualNetworkApplian throw new ConcurrentOperationException("Unable to acquire lock on " + guestNetwork.getId()); } - DomainRouterVO router = null; try { txn.start(); - router = _routerDao.findByNetwork(guestNetwork.getId()); - if (router != null) { - return router; + routers = _routerDao.findByNetwork(guestNetwork.getId()); + if (routers.size() == router_nr) { + return routers; } - long id = _routerDao.getNextInSequence(Long.class, "id"); - if (s_logger.isDebugEnabled()) { - s_logger.debug("Creating the router " + id); - } + int count = router_nr - routers.size(); PublicIp sourceNatIp = _networkMgr.assignSourceNatIpAddress(owner, guestNetwork, _accountService.getSystemUser().getId()); NicProfile defaultNic = new NicProfile(); @@ -737,31 +801,55 @@ public class VirtualNetworkApplianceManagerImpl implements VirtualNetworkApplian defaultNic.setBroadcastUri(BroadcastDomainType.Vlan.toUri(sourceNatIp.getVlanTag())); defaultNic.setIsolationUri(IsolationType.Vlan.toUri(sourceNatIp.getVlanTag())); defaultNic.setDeviceId(2); - networks.add(new Pair(publicNetworks.get(0), defaultNic)); - NicProfile gatewayNic = new NicProfile(); - - gatewayNic.setIp4Address(guestNetwork.getGateway()); - gatewayNic.setBroadcastUri(guestNetwork.getBroadcastUri()); - gatewayNic.setBroadcastType(guestNetwork.getBroadcastDomainType()); - gatewayNic.setIsolationUri(guestNetwork.getBroadcastUri()); - gatewayNic.setMode(guestNetwork.getMode()); - - String gatewayCidr = guestNetwork.getCidr(); - gatewayNic.setNetmask(NetUtils.getCidrNetmask(gatewayCidr)); - networks.add(new Pair((NetworkVO) guestNetwork, gatewayNic)); - networks.add(new Pair(controlConfig, null)); - - router = new DomainRouterVO(id, _offering.getId(), VirtualMachineName.getRouterName(id, _instance), template.getId(), template.getHypervisorType(), template.getGuestOSId(), - owner.getDomainId(), owner.getId(), guestNetwork.getId(), _offering.getOfferHA()); - router = _itMgr.allocate(router, template, _offering, networks, plan, null, owner); - // Creating stats entry for router - UserStatisticsVO stats = _userStatsDao.findBy(owner.getId(), plan.getDataCenterId(), router.getNetworkId(), null, router.getId(), router.getType().toString()); - if (stats == null) { + + for (int i = 0; i < count; i++) { + DomainRouterVO router = null; + + List> networks = new ArrayList>(3); + networks.add(new Pair(publicNetworks.get(0), defaultNic)); + + long id = _routerDao.getNextInSequence(Long.class, "id"); if (s_logger.isDebugEnabled()) { - s_logger.debug("Creating user statistics for the account: " + owner.getId() + " Router Id: " + router.getId()); + s_logger.debug("Creating the router " + id); } - stats = new UserStatisticsVO(owner.getId(), plan.getDataCenterId(), null, router.getId(), router.getType().toString(), guestNetwork.getId()); - _userStatsDao.persist(stats); + NicProfile gatewayNic = new NicProfile(); + /* For redundant router */ + if (isRedundant) { + gatewayNic.setIp4Address(_networkMgr.acquireGuestIpAddress(guestNetwork, null)); + gatewayNic.setMacAddress(_networkMgr.getNextAvailableMacAddressInNetwork(guestNetwork.getId())); + } else { + gatewayNic.setIp4Address(guestNetwork.getGateway()); + } + gatewayNic.setBroadcastUri(guestNetwork.getBroadcastUri()); + gatewayNic.setBroadcastType(guestNetwork.getBroadcastDomainType()); + gatewayNic.setIsolationUri(guestNetwork.getBroadcastUri()); + gatewayNic.setMode(guestNetwork.getMode()); + + String gatewayCidr = guestNetwork.getCidr(); + gatewayNic.setNetmask(NetUtils.getCidrNetmask(gatewayCidr)); + networks.add(new Pair((NetworkVO) guestNetwork, gatewayNic)); + networks.add(new Pair(controlConfig, null)); + + if (routers.size() > 2) { + s_logger.error("Too much redundant router!"); + } + int priority = 0; + if (isRedundant) { + priority = 100 - routers.size() * 20; + } + router = new DomainRouterVO(id, _offering.getId(), VirtualMachineName.getRouterName(id, _instance), template.getId(), template.getHypervisorType(), template.getGuestOSId(), + owner.getDomainId(), owner.getId(), guestNetwork.getId(), isRedundant, priority, false, _offering.getOfferHA()); + router = _itMgr.allocate(router, template, _offering, networks, plan, null, owner); + // Creating stats entry for router + UserStatisticsVO stats = _userStatsDao.findBy(owner.getId(), plan.getDataCenterId(), router.getNetworkId(), null, router.getId(), router.getType().toString()); + if (stats == null) { + if (s_logger.isDebugEnabled()) { + s_logger.debug("Creating user statistics for the account: " + owner.getId() + " Router Id: " + router.getId()); + } + stats = new UserStatisticsVO(owner.getId(), plan.getDataCenterId(), null, router.getId(), router.getType().toString(), guestNetwork.getId()); + _userStatsDao.persist(stats); + } + routers.add(router); } txn.commit(); } finally { @@ -769,11 +857,11 @@ public class VirtualNetworkApplianceManagerImpl implements VirtualNetworkApplian _networkDao.releaseFromLockTable(network.getId()); } } - return router; + return routers; } @Override - public DomainRouterVO deployVirtualRouter(Network guestNetwork, DeployDestination dest, Account owner, Map params) throws InsufficientCapacityException, + public List deployVirtualRouter(Network guestNetwork, DeployDestination dest, Account owner, Map params) throws InsufficientCapacityException, ConcurrentOperationException, ResourceUnavailableException { long dcId = dest.getDataCenter().getId(); @@ -786,22 +874,21 @@ public class VirtualNetworkApplianceManagerImpl implements VirtualNetworkApplian assert guestNetwork.getTrafficType() == TrafficType.Guest; DataCenterDeployment plan = new DataCenterDeployment(dcId); + + List routers = findOrCreateVirtualRouter(guestNetwork, plan, dest.getCluster().getHypervisorType(), owner); - DomainRouterVO router = _routerDao.findByNetwork(guestNetwork.getId()); - if (router == null) { - router = createVirtualRouter(guestNetwork, plan, dest.getCluster().getHypervisorType(), owner); - } - - State state = router.getState(); - if (state != State.Running) { - router = start(router, _accountService.getSystemUser(), _accountService.getSystemAccount(), params); + for (DomainRouterVO router : routers) { + State state = router.getState(); + if (state != State.Running) { + router = start(router, _accountService.getSystemUser(), _accountService.getSystemAccount(), params); + } } - return router; + return routers; } @DB - protected DomainRouterVO findOrCreateDhcpServer(Network guestNetwork, DeployDestination dest, Account owner) throws InsufficientCapacityException, ConcurrentOperationException { + protected List findOrCreateDhcpServer(Network guestNetwork, DeployDestination dest, Account owner) throws InsufficientCapacityException, ConcurrentOperationException { DataCenterDeployment plan = null; long dcId = dest.getDataCenter().getId(); @@ -810,17 +897,18 @@ public class VirtualNetworkApplianceManagerImpl implements VirtualNetworkApplian DomainRouterVO router = null; Long podId = dest.getPod().getId(); + List routers = null; // In Basic zone and Guest network we have to start domR per pod, not per network if ((dc.getNetworkType() == NetworkType.Basic || guestNetwork.isSecurityGroupEnabled()) && guestNetwork.getTrafficType() == TrafficType.Guest) { - router = _routerDao.findByNetworkAndPod(guestNetwork.getId(), podId); + routers = _routerDao.findByNetworkAndPod(guestNetwork.getId(), podId); plan = new DataCenterDeployment(dcId, podId, null, null, null); } else { - router = _routerDao.findByNetwork(guestNetwork.getId()); + routers = _routerDao.findByNetwork(guestNetwork.getId()); plan = new DataCenterDeployment(dcId); } - if (router != null) { - return router; + if (routers != null && !routers.isEmpty()) { + return routers; } long id = _routerDao.getNextInSequence(Long.class, "id"); @@ -852,19 +940,19 @@ public class VirtualNetworkApplianceManagerImpl implements VirtualNetworkApplian // In Basic zone and Guest network we have to start domR per pod, not per network if ((dc.getNetworkType() == NetworkType.Basic || guestNetwork.isSecurityGroupEnabled()) && guestNetwork.getTrafficType() == TrafficType.Guest) { - router = _routerDao.findByNetworkAndPod(guestNetwork.getId(), podId); + routers = _routerDao.findByNetworkAndPod(guestNetwork.getId(), podId); plan = new DataCenterDeployment(dcId, podId, null, null, null); } else { - router = _routerDao.findByNetwork(guestNetwork.getId()); + routers = _routerDao.findByNetwork(guestNetwork.getId()); plan = new DataCenterDeployment(dcId); } - if (router != null) { - return router; + if (routers != null && !routers.isEmpty()) { + return routers; } router = new DomainRouterVO(id, _offering.getId(), VirtualMachineName.getRouterName(id, _instance), template.getId(), template.getHypervisorType(), template.getGuestOSId(), - owner.getDomainId(), owner.getId(), guestNetwork.getId(), _offering.getOfferHA()); + owner.getDomainId(), owner.getId(), guestNetwork.getId(), false, 0, false,_offering.getOfferHA()); router.setRole(Role.DHCP_USERDATA); router = _itMgr.allocate(router, template, _offering, networks, plan, null, owner); // Creating stats entry for router @@ -882,11 +970,13 @@ public class VirtualNetworkApplianceManagerImpl implements VirtualNetworkApplian _networkDao.releaseFromLockTable(network.getId()); } } - return router; + routers = new ArrayList(1); + routers.add(router); + return routers; } @Override - public DomainRouterVO deployDhcp(Network guestNetwork, DeployDestination dest, Account owner, Map params) throws InsufficientCapacityException, StorageUnavailableException, + public List deployDhcp(Network guestNetwork, DeployDestination dest, Account owner, Map params) throws InsufficientCapacityException, StorageUnavailableException, ConcurrentOperationException, ResourceUnavailableException { NetworkOffering offering = _networkOfferingDao.findByIdIncludingRemoved(guestNetwork.getNetworkOfferingId()); @@ -900,13 +990,14 @@ public class VirtualNetworkApplianceManagerImpl implements VirtualNetworkApplian assert guestNetwork.getState() == Network.State.Implemented || guestNetwork.getState() == Network.State.Setup || guestNetwork.getState() == Network.State.Implementing : "Network is not yet fully implemented: " + guestNetwork; - DomainRouterVO router = findOrCreateDhcpServer(guestNetwork, dest, owner); - State state = router.getState(); - if (state != State.Running) { - router = start(router, _accountService.getSystemUser(), _accountService.getSystemAccount(), params); + List routers = findOrCreateDhcpServer(guestNetwork, dest, owner); + for (DomainRouterVO router : routers) { + State state = router.getState(); + if (state != State.Running) { + router = start(router, _accountService.getSystemUser(), _accountService.getSystemAccount(), params); + } } - - return router; + return routers; } @Override @@ -936,6 +1027,12 @@ public class VirtualNetworkApplianceManagerImpl implements VirtualNetworkApplian StringBuilder buf = profile.getBootArgsBuilder(); buf.append(" template=domP type=" + type); buf.append(" name=").append(profile.getHostName()); + + boolean isRedundant = _configDao.getValue("network.redundantrouter").equals("true"); + if (isRedundant) { + buf.append(" redundant_router=1"); + } + NicProfile controlNic = null; String defaultDns1 = null; String defaultDns2 = null; @@ -988,6 +1085,13 @@ public class VirtualNetworkApplianceManagerImpl implements VirtualNetworkApplian } controlNic = nic; + } else if (nic.getTrafficType() == TrafficType.Guest && isRedundant) { + Network net = _networkMgr.getNetwork(nic.getNetworkId()); + buf.append(" guestgw=").append(net.getGateway()); + String brd = NetUtils.long2Ip(NetUtils.ip2Long(nic.getIp4Address()) | ~NetUtils.ip2Long(nic.getNetmask())); + buf.append(" guestbrd=").append(brd); + buf.append(" guestcidrsize=").append(NetUtils.getCidrSize(nic.getNetmask())); + buf.append(" router_pr=").append(router.getPriority()); } } @@ -1211,81 +1315,85 @@ public class VirtualNetworkApplianceManagerImpl implements VirtualNetworkApplian } @Override - public VirtualRouter addVirtualMachineIntoNetwork(Network network, NicProfile nic, VirtualMachineProfile profile, DeployDestination dest, ReservationContext context, Boolean startDhcp) + public List addVirtualMachineIntoNetwork(Network network, NicProfile nic, VirtualMachineProfile profile, DeployDestination dest, ReservationContext context, Boolean startDhcp) throws ConcurrentOperationException, InsufficientCapacityException, ResourceUnavailableException { - DomainRouterVO router = startDhcp ? deployDhcp(network, dest, profile.getOwner(), profile.getParameters()) : deployVirtualRouter(network, dest, profile.getOwner(), profile.getParameters()); + List routers = startDhcp ? deployDhcp(network, dest, profile.getOwner(), profile.getParameters()) : deployVirtualRouter(network, dest, profile.getOwner(), profile.getParameters()); + List rets = new ArrayList(routers.size()); - _userVmDao.loadDetails((UserVmVO) profile.getVirtualMachine()); + for (DomainRouterVO router : routers) { + _userVmDao.loadDetails((UserVmVO) profile.getVirtualMachine()); - String password = (String) profile.getParameter(VirtualMachineProfile.Param.VmPassword); - String userData = profile.getVirtualMachine().getUserData(); - String sshPublicKey = profile.getVirtualMachine().getDetail("SSH.PublicKey"); - Commands cmds = new Commands(OnError.Stop); + String password = (String) profile.getParameter(VirtualMachineProfile.Param.VmPassword); + String userData = profile.getVirtualMachine().getUserData(); + String sshPublicKey = profile.getVirtualMachine().getDetail("SSH.PublicKey"); + Commands cmds = new Commands(OnError.Stop); - String routerControlIpAddress = null; - List nics = _nicDao.listByVmId(router.getId()); - for (NicVO n : nics) { - NetworkVO nc = _networkDao.findById(n.getNetworkId()); - if (nc.getTrafficType() == TrafficType.Control) { - routerControlIpAddress = n.getIp4Address(); + String routerControlIpAddress = null; + List nics = _nicDao.listByVmId(router.getId()); + for (NicVO n : nics) { + NetworkVO nc = _networkDao.findById(n.getNetworkId()); + if (nc.getTrafficType() == TrafficType.Control) { + routerControlIpAddress = n.getIp4Address(); + } } - } - DhcpEntryCommand dhcpCommand = new DhcpEntryCommand(nic.getMacAddress(), nic.getIp4Address(), profile.getVirtualMachine().getHostName()); - dhcpCommand.setAccessDetail(NetworkElementCommand.ROUTER_IP, routerControlIpAddress); - dhcpCommand.setAccessDetail(NetworkElementCommand.ROUTER_GUEST_IP, router.getGuestIpAddress()); - dhcpCommand.setAccessDetail(NetworkElementCommand.ROUTER_NAME, router.getInstanceName()); - - DataCenterVO dcVo = _dcDao.findById(router.getDataCenterIdToDeployIn()); - dhcpCommand.setAccessDetail(NetworkElementCommand.ZONE_NETWORK_TYPE, dcVo.getNetworkType().toString()); - - cmds.addCommand("dhcp", dhcpCommand); + DhcpEntryCommand dhcpCommand = new DhcpEntryCommand(nic.getMacAddress(), nic.getIp4Address(), profile.getVirtualMachine().getHostName()); + dhcpCommand.setAccessDetail(NetworkElementCommand.ROUTER_IP, routerControlIpAddress); + dhcpCommand.setAccessDetail(NetworkElementCommand.ROUTER_GUEST_IP, router.getGuestIpAddress()); + dhcpCommand.setAccessDetail(NetworkElementCommand.ROUTER_NAME, router.getInstanceName()); - // password should be set only on default network element - if (password != null && network.isDefault()) { - final String encodedPassword = PasswordGenerator.rot13(password); - SavePasswordCommand cmd = new SavePasswordCommand(encodedPassword, nic.getIp4Address(), profile.getVirtualMachine().getHostName()); - cmd.setAccessDetail(NetworkElementCommand.ROUTER_IP, router.getPrivateIpAddress()); - cmd.setAccessDetail(NetworkElementCommand.ROUTER_GUEST_IP, router.getGuestIpAddress()); - cmd.setAccessDetail(NetworkElementCommand.ROUTER_NAME, router.getInstanceName()); - cmd.setAccessDetail(NetworkElementCommand.ZONE_NETWORK_TYPE, dcVo.getNetworkType().toString()); - - cmds.addCommand("password", cmd); - } + DataCenterVO dcVo = _dcDao.findById(router.getDataCenterIdToDeployIn()); + dhcpCommand.setAccessDetail(NetworkElementCommand.ZONE_NETWORK_TYPE, dcVo.getNetworkType().toString()); - String serviceOffering = _serviceOfferingDao.findByIdIncludingRemoved(profile.getServiceOfferingId()).getDisplayText(); - String zoneName = _dcDao.findById(network.getDataCenterId()).getName(); + cmds.addCommand("dhcp", dhcpCommand); - cmds.addCommand( - "vmdata", - generateVmDataCommand(router, nic.getIp4Address(), userData, serviceOffering, zoneName, nic.getIp4Address(), profile.getVirtualMachine().getHostName(), profile.getVirtualMachine() + // password should be set only on default network element + if (password != null && network.isDefault()) { + final String encodedPassword = PasswordGenerator.rot13(password); + SavePasswordCommand cmd = new SavePasswordCommand(encodedPassword, nic.getIp4Address(), profile.getVirtualMachine().getHostName()); + cmd.setAccessDetail(NetworkElementCommand.ROUTER_IP, router.getPrivateIpAddress()); + cmd.setAccessDetail(NetworkElementCommand.ROUTER_GUEST_IP, router.getGuestIpAddress()); + cmd.setAccessDetail(NetworkElementCommand.ROUTER_NAME, router.getInstanceName()); + cmd.setAccessDetail(NetworkElementCommand.ZONE_NETWORK_TYPE, dcVo.getNetworkType().toString()); + + cmds.addCommand("password", cmd); + } + + String serviceOffering = _serviceOfferingDao.findByIdIncludingRemoved(profile.getServiceOfferingId()).getDisplayText(); + String zoneName = _dcDao.findById(network.getDataCenterId()).getName(); + + cmds.addCommand( + "vmdata", + generateVmDataCommand(router, nic.getIp4Address(), userData, serviceOffering, zoneName, nic.getIp4Address(), profile.getVirtualMachine().getHostName(), profile.getVirtualMachine() .getInstanceName(), profile.getId(), sshPublicKey)); - try { - _agentMgr.send(router.getHostId(), cmds); - } catch (OperationTimedoutException e) { - throw new AgentUnavailableException("Unable to reach the agent ", router.getHostId(), e); - } + try { + _agentMgr.send(router.getHostId(), cmds); + } catch (OperationTimedoutException e) { + throw new AgentUnavailableException("Unable to reach the agent ", router.getHostId(), e); + } - Answer answer = cmds.getAnswer("dhcp"); - if (!answer.getResult()) { - s_logger.error("Unable to set dhcp entry for " + profile + " on domR: " + router.getHostName() + " due to " + answer.getDetails()); - throw new ResourceUnavailableException("Unable to set dhcp entry for " + profile + " due to " + answer.getDetails(), DataCenter.class, router.getDataCenterIdToDeployIn()); - } + Answer answer = cmds.getAnswer("dhcp"); + if (!answer.getResult()) { + s_logger.error("Unable to set dhcp entry for " + profile + " on domR: " + router.getHostName() + " due to " + answer.getDetails()); + throw new ResourceUnavailableException("Unable to set dhcp entry for " + profile + " due to " + answer.getDetails(), DataCenter.class, router.getDataCenterIdToDeployIn()); + } - answer = cmds.getAnswer("password"); - if (answer != null && !answer.getResult()) { - s_logger.error("Unable to set password for " + profile + " due to " + answer.getDetails()); - throw new ResourceUnavailableException("Unable to set password due to " + answer.getDetails(), DataCenter.class, router.getDataCenterIdToDeployIn()); - } + answer = cmds.getAnswer("password"); + if (answer != null && !answer.getResult()) { + s_logger.error("Unable to set password for " + profile + " due to " + answer.getDetails()); + throw new ResourceUnavailableException("Unable to set password due to " + answer.getDetails(), DataCenter.class, router.getDataCenterIdToDeployIn()); + } - answer = cmds.getAnswer("vmdata"); - if (answer != null && !answer.getResult()) { - s_logger.error("Unable to set VM data for " + profile + " due to " + answer.getDetails()); - throw new ResourceUnavailableException("Unable to set VM data due to " + answer.getDetails(), DataCenter.class, router.getDataCenterIdToDeployIn()); + answer = cmds.getAnswer("vmdata"); + if (answer != null && !answer.getResult()) { + s_logger.error("Unable to set VM data for " + profile + " due to " + answer.getDetails()); + throw new ResourceUnavailableException("Unable to set VM data due to " + answer.getDetails(), DataCenter.class, router.getDataCenterIdToDeployIn()); + } + rets.add(router); } - return router; + return rets; } @Override @@ -1544,66 +1652,67 @@ public class VirtualNetworkApplianceManagerImpl implements VirtualNetworkApplian @Override public boolean associateIP(Network network, List ipAddress) throws ResourceUnavailableException { - DomainRouterVO router = _routerDao.findByNetwork(network.getId()); - if (router == null) { + List routers = _routerDao.findByNetwork(network.getId()); + if (routers != null && routers.isEmpty()) { s_logger.warn("Unable to associate ip addresses, virtual router doesn't exist in the network " + network.getId()); throw new ResourceUnavailableException("Unable to assign ip addresses", DataCenter.class, network.getDataCenterId()); } - if (router.getState() == State.Running) { - Commands cmds = new Commands(OnError.Continue); - // Have to resend all already associated ip addresses - createAssociateIPCommands(router, ipAddress, cmds, 0); + boolean result = true; + for (DomainRouterVO router : routers) { + if (router.getState() == State.Running) { + Commands cmds = new Commands(OnError.Continue); + // Have to resend all already associated ip addresses + createAssociateIPCommands(router, ipAddress, cmds, 0); - return sendCommandsToRouter(router, cmds); - } else if (router.getState() == State.Stopped) { - return true; - } else { - s_logger.warn("Unable to associate ip addresses, virtual router is not in the right state " + router.getState()); - throw new ResourceUnavailableException("Unable to assign ip addresses, domR is not in right state " + router.getState(), DataCenter.class, network.getDataCenterId()); + result = result && sendCommandsToRouter(router, cmds); + } else if (router.getState() != State.Stopped) { + s_logger.warn("Unable to associate ip addresses, virtual router is not in the right state " + router.getState()); + throw new ResourceUnavailableException("Unable to assign ip addresses, domR is not in right state " + router.getState(), DataCenter.class, network.getDataCenterId()); + } } + return result; } @Override public boolean applyFirewallRules(Network network, List rules) throws ResourceUnavailableException { - DomainRouterVO router = _routerDao.findByNetwork(network.getId()); - if (router == null) { + List routers = _routerDao.findByNetwork(network.getId()); + if (routers != null && routers.isEmpty()) { s_logger.warn("Unable to apply lb rules, virtual router doesn't exist in the network " + network.getId()); throw new ResourceUnavailableException("Unable to apply lb rules", DataCenter.class, network.getDataCenterId()); } - if (router.getState() == State.Running) { - if (rules != null && !rules.isEmpty()) { - if (rules.get(0).getPurpose() == Purpose.LoadBalancing) { - // for load balancer we have to resend all lb rules for the network - List lbs = _loadBalancerDao.listByNetworkId(network.getId()); - List lbRules = new ArrayList(); - for (LoadBalancerVO lb : lbs) { - List dstList = _lbMgr.getExistingDestinations(lb.getId()); - LoadBalancingRule loadBalancing = new LoadBalancingRule(lb, dstList); - lbRules.add(loadBalancing); + boolean result = true; + for (DomainRouterVO router : routers) { + if (router.getState() == State.Running) { + if (rules != null && !rules.isEmpty()) { + if (rules.get(0).getPurpose() == Purpose.LoadBalancing) { + // for load balancer we have to resend all lb rules for the network + List lbs = _loadBalancerDao.listByNetworkId(network.getId()); + List lbRules = new ArrayList(); + for (LoadBalancerVO lb : lbs) { + List dstList = _lbMgr.getExistingDestinations(lb.getId()); + LoadBalancingRule loadBalancing = new LoadBalancingRule(lb, dstList); + lbRules.add(loadBalancing); + } + result = result && applyLBRules(router, lbRules); + } else if (rules.get(0).getPurpose() == Purpose.PortForwarding) { + result = result && applyPortForwardingRules(router, (List) rules); + } else if (rules.get(0).getPurpose() == Purpose.StaticNat) { + result = result && applyStaticNatRules(router, (List) rules); + } else { + s_logger.warn("Unable to apply rules of purpose: " + rules.get(0).getPurpose()); + result = false; } - - return applyLBRules(router, lbRules); - } else if (rules.get(0).getPurpose() == Purpose.PortForwarding) { - return applyPortForwardingRules(router, (List) rules); - } else if (rules.get(0).getPurpose() == Purpose.StaticNat) { - return applyStaticNatRules(router, (List) rules); - - } else { - s_logger.warn("Unable to apply rules of purpose: " + rules.get(0).getPurpose()); - return false; - } + } + } else if (router.getState() == State.Stopped || router.getState() == State.Stopping) { + s_logger.debug("Router is in " + router.getState() + ", so not sending apply firewall rules commands to the backend"); } else { - return true; + s_logger.warn("Unable to apply firewall rules, virtual router is not in the right state " + router.getState()); + throw new ResourceUnavailableException("Unable to apply firewall rules, virtual router is not in the right state", VirtualRouter.class, router.getId()); } - } else if (router.getState() == State.Stopped || router.getState() == State.Stopping) { - s_logger.debug("Router is in " + router.getState() + ", so not sending apply firewall rules commands to the backend"); - return true; - } else { - s_logger.warn("Unable to apply firewall rules, virtual router is not in the right state " + router.getState()); - throw new ResourceUnavailableException("Unable to apply firewall rules, virtual router is not in the right state", VirtualRouter.class, router.getId()); } + return result; } protected boolean applyLBRules(DomainRouterVO router, List rules) throws ResourceUnavailableException { @@ -1628,8 +1737,12 @@ public class VirtualNetworkApplianceManagerImpl implements VirtualNetworkApplian } @Override - public VirtualRouter getRouterForNetwork(long networkId) { - return _routerDao.findByNetwork(networkId); - + public List getRoutersForNetwork(long networkId) { + List routers = _routerDao.findByNetwork(networkId); + List vrs = new ArrayList(routers.size()); + for (DomainRouterVO router : routers) { + vrs.add(router); + } + return vrs; } } diff --git a/server/src/com/cloud/vm/VirtualMachineManagerImpl.java b/server/src/com/cloud/vm/VirtualMachineManagerImpl.java index 8362b0969a4..939d7dbb6b1 100755 --- a/server/src/com/cloud/vm/VirtualMachineManagerImpl.java +++ b/server/src/com/cloud/vm/VirtualMachineManagerImpl.java @@ -613,6 +613,15 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene Journal journal = start.second().getJournal(); ExcludeList avoids = new ExcludeList(); + if (vm.getType().equals(VirtualMachine.Type.DomainRouter)) { + List routers = _routerDao.findBy(vm.getAccountId(), vm.getDataCenterIdToDeployIn()); + for (DomainRouterVO router : routers) { + if (router.hostId != null) { + avoids.addHost(router.hostId); + s_logger.info("Router: try to avoid host " + router.hostId); + } + } + } int retry = _retry; while (retry-- != 0) { // It's != so that it can match -1. // edit plan if this vm's ROOT volume is in READY state already diff --git a/server/src/com/cloud/vm/dao/DomainRouterDao.java b/server/src/com/cloud/vm/dao/DomainRouterDao.java index 93cf692bd96..2ff225d88ee 100755 --- a/server/src/com/cloud/vm/dao/DomainRouterDao.java +++ b/server/src/com/cloud/vm/dao/DomainRouterDao.java @@ -43,7 +43,7 @@ public interface DomainRouterDao extends GenericDao { * @Param dcId data center Id. * @return DomainRouterVO */ - public DomainRouterVO findBy(long accountId, long dcId); + public List findBy(long accountId, long dcId); /** * gets the DomainRouterVO by user id. @@ -78,11 +78,11 @@ public interface DomainRouterDao extends GenericDao { */ public List listByDomain(Long id); - DomainRouterVO findBy(long accountId, long dcId, Role role); + List findBy(long accountId, long dcId, Role role); - DomainRouterVO findByNetwork(long networkId); + List findByNetwork(long networkId); - DomainRouterVO findByNetworkAndPod(long networkId, long podId); + List findByNetworkAndPod(long networkId, long podId); List listActive(long networkId); diff --git a/server/src/com/cloud/vm/dao/DomainRouterDaoImpl.java b/server/src/com/cloud/vm/dao/DomainRouterDaoImpl.java index 353e2c2479f..da7ac811bf3 100755 --- a/server/src/com/cloud/vm/dao/DomainRouterDaoImpl.java +++ b/server/src/com/cloud/vm/dao/DomainRouterDaoImpl.java @@ -107,21 +107,21 @@ public class DomainRouterDaoImpl extends GenericDaoBase im } @Override - public DomainRouterVO findBy(long accountId, long dcId) { + public List findBy(long accountId, long dcId) { SearchCriteria sc = AllFieldsSearch.create(); sc.setParameters("account", accountId); sc.setParameters("dc", dcId); sc.setParameters("role", Role.DHCP_FIREWALL_LB_PASSWD_USERDATA); - return findOneBy(sc); + return listBy(sc); } @Override - public DomainRouterVO findBy(long accountId, long dcId, Role role) { + public List findBy(long accountId, long dcId, Role role) { SearchCriteria sc = AllFieldsSearch.create(); sc.setParameters("account", accountId); sc.setParameters("dc", dcId); sc.setParameters("role", role); - return findOneBy(sc); + return listBy(sc); } @Override @@ -157,10 +157,10 @@ public class DomainRouterDaoImpl extends GenericDaoBase im } @Override - public DomainRouterVO findByNetwork(long networkId) { + public List findByNetwork(long networkId) { SearchCriteria sc = AllFieldsSearch.create(); sc.setParameters("network", networkId); - return findOneBy(sc); + return listBy(sc); } @Override @@ -172,11 +172,11 @@ public class DomainRouterDaoImpl extends GenericDaoBase im } @Override - public DomainRouterVO findByNetworkAndPod(long networkId, long podId) { + public List findByNetworkAndPod(long networkId, long podId) { SearchCriteria sc = AllFieldsSearch.create(); sc.setParameters("network", networkId); sc.setParameters("podId", podId); - return findOneBy(sc); + return listBy(sc); } @Override diff --git a/setup/db/create-schema.sql b/setup/db/create-schema.sql index 3d49570ec02..ed7ac75a101 100755 --- a/setup/db/create-schema.sql +++ b/setup/db/create-schema.sql @@ -894,6 +894,9 @@ CREATE TABLE `cloud`.`domain_router` ( `guest_netmask` varchar(15) COMMENT 'netmask used for the guest network', `guest_ip_address` char(40) COMMENT ' ip address in the guest network', `network_id` bigint unsigned NOT NULL COMMENT 'network configuration that this domain router belongs to', + `is_redundant_router` int(1) unsigned NOT NULL COMMENT 'if in redundant router mode', + `priority` int(4) unsigned COMMENT 'priority of router in the redundant router mode', + `is_master` int(1) unsigned DEFAULT 0 COMMENT 'if the router is master in redundant router mode', `role` varchar(64) NOT NULL COMMENT 'type of role played by this router', PRIMARY KEY (`id`), CONSTRAINT `fk_domain_router__id` FOREIGN KEY `fk_domain_router__id` (`id`) REFERENCES `vm_instance`(`id`) ON DELETE CASCADE