Merge pull request #1070 from ekholabs/fix/rVPC-CLOUDSTACK-9015

[4.6.1] CLOUDSTACK-9015 - Redundant VPC Virtual Router's state is BACKUP & BACKUP or MASTER & MASTERThis PR closes #1064

All the details can be found in the original PR, which won't be merged because it was created agains master. Once this PR is closed, the original one will be also closed.

* pr/1070:
  CLOUDSTACK-9015 - Delete public IP in order to get both IP and NAT rule removed.
  CLOUDSTACK-9015 - Add test to cover the rVPC routers stop/start/reboot scenario
  CLOUDSTACK-9015 - Make sure the Backup router can talk to the Master router after a stop/start/reboot

Signed-off-by: Remi Bergsma <github@remi.nl>
This commit is contained in:
Remi Bergsma 2015-11-19 17:52:30 +01:00
commit 78e5518741
6 changed files with 85 additions and 36 deletions

View File

@ -906,9 +906,6 @@ def main(argv):
fwd = CsForwardingRules("forwardingrules", config)
fwd.process()
red = CsRedundant(config)
red.set()
logging.debug("Configuring s2s vpn")
vpns = CsSite2SiteVpn("site2sitevpn", config)
vpns.process()
@ -938,6 +935,9 @@ def main(argv):
logging.debug("Configuring iptables rules .....")
nf = CsNetfilters()
nf.compare(config.get_fw())
red = CsRedundant(config)
red.set()
logging.debug("Configuring iptables rules done ...saving rules")

View File

@ -228,10 +228,10 @@ class CsDevice:
continue
self.devlist.append(vals[0])
def waitfordevice(self):
def waitfordevice(self, timeout=15):
""" Wait up to 15 seconds for a device to become available """
count = 0
while count < 15:
while count < timeout:
if self.dev in self.devlist:
return True
time.sleep(1)
@ -498,6 +498,9 @@ class CsIP:
self.fw.append(["", "", "-A NETWORK_STATS ! -i eth0 -o eth2 -p tcp"])
self.fw.append(["", "", "-A NETWORK_STATS -i eth2 ! -o eth0 -p tcp"])
self.fw.append(["filter", "", "-A INPUT -d 224.0.0.18/32 -j ACCEPT"])
self.fw.append(["filter", "", "-A INPUT -d 225.0.0.50/32 -j ACCEPT"])
self.fw.append(["filter", "", "-A INPUT -p icmp -j ACCEPT"])
self.fw.append(["filter", "", "-A INPUT -i eth0 -p tcp -m tcp --dport 3922 -m state --state NEW,ESTABLISHED -j ACCEPT"])

View File

@ -86,6 +86,29 @@ class CsRedundant(object):
self._redundant_off()
return
interfaces = [interface for interface in self.address.get_ips() if interface.is_guest()]
isDeviceReady = False
dev = ''
for interface in interfaces:
if dev == interface.get_device():
continue
dev = interface.get_device()
logging.info("Wait for devices to be configured so we can start keepalived")
devConfigured = CsDevice(dev, self.config).waitfordevice()
if devConfigured:
command = "ip link show %s | grep 'state UP'" % dev
devUp = CsHelper.execute(command)
if devUp:
logging.info("Device %s is present, let's start keepalive now." % dev)
isDeviceReady = True
if not isDeviceReady:
logging.info("Guest network not configured yet, let's stop router redundancy for now.")
CsHelper.service("conntrackd", "stop")
CsHelper.service("keepalived", "stop")
return
CsHelper.mkdir(self.CS_RAMDISK_DIR, 0755, False)
CsHelper.mount_tmpfs(self.CS_RAMDISK_DIR)
CsHelper.mkdir(self.CS_ROUTER_DIR, 0755, False)
@ -129,17 +152,16 @@ class CsRedundant(object):
CsHelper.copy(conntrackd_template_conf, conntrackd_temp_bkp)
conntrackd_tmpl = CsFile(conntrackd_template_conf)
if guest is not None:
conntrackd_tmpl.section("Multicast {", "}", [
"IPv4_address 225.0.0.50\n",
"Group 3780\n",
"IPv4_interface %s\n" % guest.get_ip(),
"Interface %s\n" % guest.get_device(),
"SndSocketBuffer 1249280\n",
"RcvSocketBuffer 1249280\n",
"Checksum on\n"])
conntrackd_tmpl.section("Address Ignore {", "}", self._collect_ignore_ips())
conntrackd_tmpl.commit()
conntrackd_tmpl.section("Multicast {", "}", [
"IPv4_address 225.0.0.50\n",
"Group 3780\n",
"IPv4_interface %s\n" % guest.get_ip(),
"Interface %s\n" % guest.get_device(),
"SndSocketBuffer 1249280\n",
"RcvSocketBuffer 1249280\n",
"Checksum on\n"])
conntrackd_tmpl.section("Address Ignore {", "}", self._collect_ignore_ips())
conntrackd_tmpl.commit()
conntrackd_conf = CsFile(self.CONNTRACKD_CONF)
@ -164,22 +186,6 @@ class CsRedundant(object):
"* * * * * root sleep 30; $SHELL %s/check_heartbeat.sh 2>&1 > /dev/null" % self.CS_ROUTER_DIR, -1)
heartbeat_cron.commit()
# Configure KeepaliveD cron job - runs at every reboot
keepalived_cron = CsFile("/etc/cron.d/keepalived")
keepalived_cron.add("SHELL=/bin/bash", 0)
keepalived_cron.add(
"PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin", 1)
keepalived_cron.add("@reboot root service keepalived start", -1)
keepalived_cron.commit()
# Configure ConntrackD cron job - runs at every reboot
conntrackd_cron = CsFile("/etc/cron.d/conntrackd")
conntrackd_cron.add("SHELL=/bin/bash", 0)
conntrackd_cron.add(
"PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin", 1)
conntrackd_cron.add("@reboot root service conntrackd start", -1)
conntrackd_cron.commit()
proc = CsProcess(['/usr/sbin/keepalived'])
if not proc.find() or keepalived_conf.is_changed():
keepalived_conf.commit()

View File

@ -42,7 +42,7 @@ logging.basicConfig(filename=config.get_logger(),
format=config.get_format())
config.cmdline()
cl = CsCmdLine("cmdline", config)
#Update the configuration to set state as backup and let keepalived decide who is the real Master
#Update the configuration to set state as backup and let keepalived decide who the real Master is!
cl.set_master_state(False)
cl.save()

View File

@ -26,7 +26,7 @@ vrrp_script heartbeat {
vrrp_instance inside_network {
state EQUAL
interface eth0
interface eth2
virtual_router_id 51
nopreempt
@ -37,7 +37,7 @@ vrrp_instance inside_network {
}
virtual_ipaddress {
[ROUTER_IP] brd [BOARDCAST] dev eth0
[ROUTER_IP] brd [BOARDCAST] dev eth2
}
track_script {

View File

@ -23,6 +23,7 @@ from marvin.cloudstackTestCase import cloudstackTestCase
from marvin.lib.base import (stopRouter,
startRouter,
destroyRouter,
rebootRouter,
Account,
VpcOffering,
VPC,
@ -356,6 +357,12 @@ class TestVPCRedundancy(cloudstackTestCase):
cmd.id = router.id
self.apiclient.stopRouter(cmd)
def reboot_router(self, router):
self.logger.debug('Rebooting router %s' % router.id)
cmd = rebootRouter.rebootRouterCmd()
cmd.id = router.id
self.apiclient.rebootRouter(cmd)
def stop_router_by_type(self, type):
self.check_master_status(2)
self.logger.debug('Stopping %s router' % type)
@ -363,6 +370,13 @@ class TestVPCRedundancy(cloudstackTestCase):
if router.redundantstate == type:
self.stop_router(router)
def reboot_router_by_type(self, type):
self.check_master_status(2)
self.logger.debug('Rebooting %s router' % type)
for router in self.routers:
if router.redundantstate == type:
self.reboot_router(router)
def destroy_routers(self):
self.logger.debug('Destroying routers')
for router in self.routers:
@ -521,6 +535,7 @@ class TestVPCRedundancy(cloudstackTestCase):
self.delete_nat_rules()
self.check_master_status(1)
self.do_vpc_test(True)
self.delete_public_ip()
self.start_routers()
self.add_nat_rules()
@ -537,6 +552,24 @@ class TestVPCRedundancy(cloudstackTestCase):
self.check_master_status(2)
self.add_nat_rules()
self.do_default_routes_test()
@attr(tags=["advanced", "intervlan"], required_hardware="true")
def test_03_create_redundant_VPC_1tier_2VMs_2IPs_2PF_ACL_reboot_routers(self):
""" Create a redundant VPC with two networks with two VMs in each network """
self.logger.debug("Starting test_01_create_redundant_VPC_2tiers_4VMs_4IPs_4PF_ACL")
self.query_routers()
self.networks.append(self.create_network(self.services["network_offering"], "10.1.1.1"))
self.check_master_status(2)
self.add_nat_rules()
self.do_vpc_test(False)
self.reboot_router_by_type("MASTER")
self.check_master_status(2)
self.do_vpc_test(False)
self.reboot_router_by_type("MASTER")
self.check_master_status(2)
self.do_vpc_test(False)
def delete_nat_rules(self):
for o in self.networks:
@ -545,6 +578,14 @@ class TestVPCRedundancy(cloudstackTestCase):
vm.get_nat().delete(self.apiclient)
vm.set_nat(None)
def delete_public_ip(self):
for o in self.networks:
for vm in o.get_vms():
if vm.get_ip() is not None:
vm.get_ip().delete(self.apiclient)
vm.set_ip(None)
vm.set_nat(None)
def add_nat_rules(self):
for o in self.networks:
for vm in o.get_vms():
@ -552,7 +593,6 @@ class TestVPCRedundancy(cloudstackTestCase):
vm.set_ip(self.acquire_publicip(o.get_net()))
if vm.get_nat() is None:
vm.set_nat(self.create_natrule(vm.get_vm(), vm.get_ip(), o.get_net()))
time.sleep(5)
def do_vpc_test(self, expectFail):
retries = 5