From 7ccc833114bae575962b69b212edb080b99c412d Mon Sep 17 00:00:00 2001 From: Sheng Yang Date: Fri, 19 Aug 2011 15:34:02 -0700 Subject: [PATCH] Fix potential redundant router FAULT state by dnsmasq This message may show during redundant router start up: FAULT (Restarting DNS forwarder and DHCP server: dnsmasq failed!) This caused by edithost.sh is racy with keepalived process. They both want to restart dnsmasq. Even in normal condition, it's very hard to reproduce this bug. Add file lock for edithost.sh should solve it. --- .../systemvm/debian/config/root/edithosts.sh | 35 ++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) diff --git a/patches/systemvm/debian/config/root/edithosts.sh b/patches/systemvm/debian/config/root/edithosts.sh index 478ce4ba552..506c5e68b48 100755 --- a/patches/systemvm/debian/config/root/edithosts.sh +++ b/patches/systemvm/debian/config/root/edithosts.sh @@ -67,6 +67,33 @@ sed -i /"$2 "/d /etc/hosts sed -i /"$3"/d /etc/hosts echo "$2 $3" >> /etc/hosts +locked=0 +if [ $no_redundant -eq 0 ] +then +#for redundant router, grap the lock to prevent racy with keepalived process +LOCK=/tmp/rrouter.lock + +# Wait the lock +for i in `seq 1 5` +do + if [ ! -e $LOCK ] + then + touch $LOCK + locked=1 + break + fi + sleep 1 + logger -t cloud "edithosts: sleep 1 second wait for the redundant router lock" +done + +if [ $locked -eq 0 ] +then + logger -t cloud "edithosts: fail to get the redundant router lock" + logger -t cloud "edithosts: keepalived should able to handle the dnsmasq restart" + exit +fi +fi + # make dnsmasq re-read files pid=$(pidof dnsmasq) if [ "$pid" != "" ] @@ -81,4 +108,10 @@ else fi fi -exit $? +ret=$? +if [ $locked -eq 1 ] +then + rm $LOCK +fi + +exit $ret