Fix cloud-sysvmadmin hang (#12355)

This commit is contained in:
Abhisar Sinha 2026-01-13 13:36:25 +05:30 committed by GitHub
parent 8627c60b95
commit 9e86fdf1b6
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
1 changed files with 33 additions and 96 deletions

View File

@ -184,49 +184,29 @@ stop_start_system() {
stop_start_router() { stop_start_router() {
prepare_ids_clause prepare_ids_clause
router=(`mysql -h $db --user=$user --password=$password --skip-column-names -U cloud -e "select uuid from vm_instance where state=\"Running\" and type=\"DomainRouter\"$zone$vmidsclause"`) router=(`mysql -h $db --user=$user --password=$password --skip-column-names -U cloud -e "select uuid from vm_instance where state=\"Running\" and type=\"DomainRouter\"$zone$vmidsclause"`)
length_router=(${#router[@]}) length_router=${#router[@]}
echo -e "\nStopping and starting $length_router running routing vm(s)$inzone$withids... " echo -e "\nStopping and starting $length_router running routing vm(s)$inzone$withids... "
echo -e "[$(date "+%Y.%m.%d-%H.%M.%S")] Stopping and starting $length_router running routing vm(s)$inzone$withids... " >>$LOGFILE echo -e "[$(date "+%Y.%m.%d-%H.%M.%S")] Stopping and starting $length_router running routing vm(s)$inzone$withids... " >>$LOGFILE
#Spawn reboot router in parallel - run commands in <n> chunks - number of threads is configurable #Spawn reboot router in parallel - run commands in <n> chunks - number of threads is configurable
if [ $maxthreads -gt $length_router ]; then
maxthreads=$length_router
fi
pids=() pids=()
for d in "${router[@]}"; do for d in "${router[@]}"; do
reboot_router $d & reboot_router $d &
pids=( "${pids[@]}" $! ) pids=( "${pids[@]}" $! )
length_pids=${#pids[@]}
length_pids=(${#pids[@]})
unfinishedPids=(${#pids[@]})
if [ $maxthreads -gt $length_router ]; then
maxthreads=$length_router
fi
if [ $length_pids -ge $maxthreads ]; then if [ $length_pids -ge $maxthreads ]; then
while [ $unfinishedPids -gt 0 ]; do # Wait for $maxthreads number of processes to finish
sleep 10 wait
count=0 # Clear the pids array for the next batch
for (( i = 0 ; i < $length_pids; i++ )); do
if ! ps ax | grep -v grep | grep ${pids[$i]} > /dev/null; then
count=`expr $count + 1`
fi
done
if [ $count -eq $unfinishedPids ]; then
unfinishedPids=0
fi
done
#remove all elements from pids
if [ $unfinishedPids -eq 0 ]; then
pids=() pids=()
length_pids=(${#pids[@]})
fi
fi fi
done done
@ -234,9 +214,8 @@ stop_start_router() {
if [ "$length_router" == "0" ];then if [ "$length_router" == "0" ];then
echo -e "[$(date "+%Y.%m.%d-%H.%M.%S")] No running router vms found \n" >>$LOGFILE echo -e "[$(date "+%Y.%m.%d-%H.%M.%S")] No running router vms found \n" >>$LOGFILE
else else
while [ $unfinishedPids -gt 0 ]; do # Wait for the remaining background processes to finish
sleep 10 wait
done
echo -e "Done restarting router(s)$inzone$withids. \n" echo -e "Done restarting router(s)$inzone$withids. \n"
echo -e "[$(date "+%Y.%m.%d-%H.%M.%S")] Done restarting router(s)$inzone$withids. \n" >>$LOGFILE echo -e "[$(date "+%Y.%m.%d-%H.%M.%S")] Done restarting router(s)$inzone$withids. \n" >>$LOGFILE
@ -288,49 +267,29 @@ reboot_router(){
restart_networks(){ restart_networks(){
networks=(`mysql -h $db --user=$user --password=$password --skip-column-names -U cloud -e "select n.id networks=(`mysql -h $db --user=$user --password=$password --skip-column-names -U cloud -e "select n.id
from networks n, network_offerings no where n.network_offering_id = no.id and no.system_only = 0 and n.removed is null$zone"`) from networks n, network_offerings no where n.network_offering_id = no.id and no.system_only = 0 and n.removed is null$zone"`)
length_networks=(${#networks[@]}) length_networks=${#networks[@]}
echo -e "\nRestarting $length_networks networks$inzone... " echo -e "\nRestarting $length_networks networks$inzone... "
echo -e "[$(date "+%Y.%m.%d-%H.%M.%S")] Restarting $length_networks networks$inzone... " >>$LOGFILE echo -e "[$(date "+%Y.%m.%d-%H.%M.%S")] Restarting $length_networks networks$inzone... " >>$LOGFILE
#Spawn restart network in parallel - run commands in <n> chunks - number of threads is configurable #Spawn restart network in parallel - run commands in <n> chunks - number of threads is configurable
if [ $maxthreads -gt $length_networks ]; then
maxthreads=$length_networks
fi
pids=() pids=()
for d in "${networks[@]}"; do for d in "${networks[@]}"; do
restart_network $d & restart_network $d &
pids=( "${pids[@]}" $! ) pids=( "${pids[@]}" $! )
length_pids=${#pids[@]}
length_pids=(${#pids[@]})
unfinishedPids=(${#pids[@]})
if [ $maxthreads -gt $length_networks ]; then
maxthreads=$length_networks
fi
if [ $length_pids -ge $maxthreads ]; then if [ $length_pids -ge $maxthreads ]; then
while [ $unfinishedPids -gt 0 ]; do # Wait for $maxthreads number of processes to finish
sleep 10 wait
count=0 # Clear the pids array for the next batch
for (( i = 0 ; i < $length_pids; i++ )); do
if ! ps ax | grep -v grep | grep ${pids[$i]} > /dev/null; then
count=`expr $count + 1`
fi
done
if [ $count -eq $unfinishedPids ]; then
unfinishedPids=0
fi
done
#remove all elements from pids
if [ $unfinishedPids -eq 0 ]; then
pids=() pids=()
length_pids=(${#pids[@]})
fi
fi fi
done done
@ -339,9 +298,8 @@ restart_networks(){
if [ "$length_networks" == "0" ];then if [ "$length_networks" == "0" ];then
echo -e "[$(date "+%Y.%m.%d-%H.%M.%S")] No networks found \n" >>$LOGFILE echo -e "[$(date "+%Y.%m.%d-%H.%M.%S")] No networks found \n" >>$LOGFILE
else else
while [ $unfinishedPids -gt 0 ]; do # Wait for the remaining background processes to finish
sleep 10 wait
done
echo -e "Done restarting networks$inzone. \n" echo -e "Done restarting networks$inzone. \n"
echo -e "[$(date "+%Y.%m.%d-%H.%M.%S")] Done restarting networks$inzone. \n" >>$LOGFILE echo -e "[$(date "+%Y.%m.%d-%H.%M.%S")] Done restarting networks$inzone. \n" >>$LOGFILE
@ -392,49 +350,29 @@ restart_vpc(){
restart_vpcs(){ restart_vpcs(){
vpcs=(`mysql -h $db --user=$user --password=$password --skip-column-names -U cloud -e "select uuid from vpc WHERE removed is null$zone"`) vpcs=(`mysql -h $db --user=$user --password=$password --skip-column-names -U cloud -e "select uuid from vpc WHERE removed is null$zone"`)
length_vpcs=(${#vpcs[@]}) length_vpcs=${#vpcs[@]}
echo -e "\nRestarting $length_vpcs vpcs... " echo -e "\nRestarting $length_vpcs vpcs... "
echo -e "[$(date "+%Y.%m.%d-%H.%M.%S")] Restarting $length_vpcs vpcs... " >>$LOGFILE echo -e "[$(date "+%Y.%m.%d-%H.%M.%S")] Restarting $length_vpcs vpcs... " >>$LOGFILE
#Spawn restart vpcs in parallel - run commands in <n> chunks - number of threads is configurable #Spawn restart vpcs in parallel - run commands in <n> chunks - number of threads is configurable
if [ $maxthreads -gt $length_vpcs ]; then
maxthreads=$length_vpcs
fi
pids=() pids=()
for d in "${vpcs[@]}"; do for d in "${vpcs[@]}"; do
restart_vpc $d & restart_vpc $d &
pids=( "${pids[@]}" $! ) pids=( "${pids[@]}" $! )
length_pids=${#pids[@]}
length_pids=(${#pids[@]})
unfinishedPids=(${#pids[@]})
if [ $maxthreads -gt $length_vpcs ]; then
maxthreads=$length_vpcs
fi
if [ $length_pids -ge $maxthreads ]; then if [ $length_pids -ge $maxthreads ]; then
while [ $unfinishedPids -gt 0 ]; do # Wait for $maxthreads number of processes to finish
sleep 10 wait
count=0 # Clear the pids array for the next batch
for (( i = 0 ; i < $length_pids; i++ )); do
if ! ps ax | grep -v grep | grep ${pids[$i]} > /dev/null; then
count=`expr $count + 1`
fi
done
if [ $count -eq $unfinishedPids ]; then
unfinishedPids=0
fi
done
#remove all elements from pids
if [ $unfinishedPids -eq 0 ]; then
pids=() pids=()
length_pids=(${#pids[@]})
fi
fi fi
done done
@ -443,9 +381,8 @@ restart_vpcs(){
if [ "$length_vpcs" == "0" ];then if [ "$length_vpcs" == "0" ];then
echo -e "[$(date "+%Y.%m.%d-%H.%M.%S")] No vpcs found \n" >>$LOGFILE echo -e "[$(date "+%Y.%m.%d-%H.%M.%S")] No vpcs found \n" >>$LOGFILE
else else
while [ $unfinishedPids -gt 0 ]; do # Wait for the remaining background processes to finish
sleep 10 wait
done
echo -e "Done restarting vpcs$inzone. \n" echo -e "Done restarting vpcs$inzone. \n"
echo -e "[$(date "+%Y.%m.%d-%H.%M.%S")] Done restarting vpcs$inzone. \n" >>$LOGFILE echo -e "[$(date "+%Y.%m.%d-%H.%M.%S")] Done restarting vpcs$inzone. \n" >>$LOGFILE