mirror of https://github.com/apache/cloudstack.git
Merge f5f3db5ea3 into 5893ba5a8c
This commit is contained in:
commit
25c553532f
|
|
@ -310,6 +310,31 @@ iscsi.session.cleanup.enabled=false
|
|||
# This parameter specifies if the host must be rebooted when something goes wrong with the heartbeat.
|
||||
#reboot.host.and.alert.management.on.heartbeat.timeout=true
|
||||
|
||||
# Action taken by kvmheartbeat.sh / kvmspheartbeat.sh when a storage heartbeat
|
||||
# write fails persistently. Supersedes the legacy binary
|
||||
# 'reboot.host.and.alert.management.on.heartbeat.timeout' when set to a non-default value.
|
||||
#
|
||||
# Allowed values:
|
||||
# hard-reboot - immediate sysrq-trigger reboot (default; 'reboot' kept as alias).
|
||||
# Required default for setups where a stale NFSv3 mount can prevent
|
||||
# a graceful shutdown from completing.
|
||||
# graceful-reboot - 'systemctl reboot' instead of sysrq; allows VMs to stop cleanly.
|
||||
# Use only if a stale storage mount cannot block shutdown.
|
||||
# restart-agent - restart cloudstack-agent only; running VMs are preserved.
|
||||
# log-only - log + alert; take no automatic action (admin must investigate).
|
||||
# custom - invoke the script at 'kvm.heartbeat.fence.custom.script' (see below).
|
||||
# Script is called with one positional arg: the heartbeat script name
|
||||
# (e.g. 'kvmheartbeat.sh'). Falls back to hard-reboot if missing or
|
||||
# not executable.
|
||||
#
|
||||
# The non-default values are recommended for setups using LINSTOR/DRBD or any local
|
||||
# storage with replication, where transient I/O contention can cause a heartbeat
|
||||
# write to time out without the host actually being unhealthy.
|
||||
#kvm.heartbeat.fence.action=hard-reboot
|
||||
|
||||
# Path to the operator-supplied script invoked when kvm.heartbeat.fence.action=custom.
|
||||
#kvm.heartbeat.fence.custom.script=/etc/cloudstack/agent/heartbeat-fence-custom.sh
|
||||
|
||||
# Enables manually setting CPU's topology on KVM's VM.
|
||||
#enable.manually.setting.cpu.topology.on.kvm.vm=true
|
||||
|
||||
|
|
|
|||
|
|
@ -598,6 +598,44 @@ public class AgentProperties{
|
|||
public static final Property<Boolean> REBOOT_HOST_AND_ALERT_MANAGEMENT_ON_HEARTBEAT_TIMEOUT
|
||||
= new Property<>("reboot.host.and.alert.management.on.heartbeat.timeout", true);
|
||||
|
||||
/**
|
||||
* Action taken by the KVM agent's storage heartbeat scripts (kvmheartbeat.sh / kvmspheartbeat.sh)
|
||||
* when a heartbeat write fails persistently. Allowed values:
|
||||
* <ul>
|
||||
* <li>{@code hard-reboot} (default; {@code reboot} accepted as alias) — immediate
|
||||
* sysrq-trigger reboot. Required default for setups where a stale NFSv3 mount can
|
||||
* prevent a graceful shutdown from completing.</li>
|
||||
* <li>{@code graceful-reboot} — {@code systemctl reboot} instead of sysrq; allows VMs
|
||||
* to stop cleanly. Use only if a stale storage mount cannot block shutdown.</li>
|
||||
* <li>{@code restart-agent} — restart cloudstack-agent only; running VMs preserved.</li>
|
||||
* <li>{@code log-only} — log + alert; take no automatic action (admin must investigate).</li>
|
||||
* <li>{@code custom} — invoke the script at {@link #KVM_HEARTBEAT_FENCE_CUSTOM_SCRIPT}
|
||||
* (default {@code /etc/cloudstack/agent/heartbeat-fence-custom.sh}). The script is
|
||||
* called with one argument: the heartbeat script name (e.g. {@code kvmheartbeat.sh}).
|
||||
* If the script is missing or not executable, falls back to {@code hard-reboot}.</li>
|
||||
* </ul>
|
||||
* The non-default values are recommended for setups using LINSTOR/DRBD or other replicated
|
||||
* local storage, where transient I/O contention can cause a heartbeat write to time out
|
||||
* without the host actually being unhealthy.<br>
|
||||
* Read by the heartbeat shell scripts directly from agent.properties.<br>
|
||||
* Data type: String.<br>
|
||||
* Default value: {@code hard-reboot}
|
||||
*/
|
||||
public static final Property<String> KVM_HEARTBEAT_FENCE_ACTION
|
||||
= new Property<>("kvm.heartbeat.fence.action", "hard-reboot");
|
||||
|
||||
/**
|
||||
* Path to the operator-supplied script invoked when
|
||||
* {@link #KVM_HEARTBEAT_FENCE_ACTION} is set to {@code custom}. The script must be
|
||||
* executable and is called with a single positional argument: the heartbeat script name
|
||||
* that triggered the fence (e.g. {@code kvmheartbeat.sh}). Read by the heartbeat shell
|
||||
* scripts directly from agent.properties.<br>
|
||||
* Data type: String.<br>
|
||||
* Default value: {@code /etc/cloudstack/agent/heartbeat-fence-custom.sh}
|
||||
*/
|
||||
public static final Property<String> KVM_HEARTBEAT_FENCE_CUSTOM_SCRIPT
|
||||
= new Property<>("kvm.heartbeat.fence.custom.script", "/etc/cloudstack/agent/heartbeat-fence-custom.sh");
|
||||
|
||||
/**
|
||||
* Enables manually setting CPU's topology on KVM's VM. <br>
|
||||
* Data type: Boolean.<br>
|
||||
|
|
|
|||
|
|
@ -0,0 +1,85 @@
|
|||
#!/bin/bash
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
#
|
||||
# Shared fence-action helper for kvmheartbeat.sh and kvmspheartbeat.sh.
|
||||
# Sourced by both scripts; do not invoke directly.
|
||||
#
|
||||
# Usage from caller:
|
||||
# source "$(dirname "$0")/kvmha-fence.sh"
|
||||
# fence_action "kvmheartbeat.sh" # script name passed for log tagging
|
||||
|
||||
AGENT_PROPS="${AGENT_PROPS:-/etc/cloudstack/agent/agent.properties}"
|
||||
|
||||
fence_action() {
|
||||
local source_script="${1:-kvmha}"
|
||||
local FENCE_ACTION="hard-reboot"
|
||||
local CUSTOM_SCRIPT="/etc/cloudstack/agent/heartbeat-fence-custom.sh"
|
||||
|
||||
if [ -r "$AGENT_PROPS" ]; then
|
||||
local val
|
||||
val=$(grep -E '^[[:space:]]*kvm\.heartbeat\.fence\.action[[:space:]]*=' "$AGENT_PROPS" | tail -n 1 | cut -d= -f2- | tr -d '[:space:]')
|
||||
[ -n "$val" ] && FENCE_ACTION="$val"
|
||||
local cval
|
||||
cval=$(grep -E '^[[:space:]]*kvm\.heartbeat\.fence\.custom\.script[[:space:]]*=' "$AGENT_PROPS" | tail -n 1 | cut -d= -f2- | sed 's/^[[:space:]]*//;s/[[:space:]]*$//')
|
||||
[ -n "$cval" ] && CUSTOM_SCRIPT="$cval"
|
||||
fi
|
||||
|
||||
case "$FENCE_ACTION" in
|
||||
log-only)
|
||||
/usr/bin/logger -t heartbeat "${source_script}: heartbeat write to storage failed; fence action 'log-only' selected — taking no automatic action. Operator must investigate."
|
||||
exit 0
|
||||
;;
|
||||
restart-agent)
|
||||
/usr/bin/logger -t heartbeat "${source_script}: heartbeat write to storage failed; fence action 'restart-agent' — restarting cloudstack-agent (running VMs preserved)."
|
||||
sync &
|
||||
sleep 2
|
||||
systemctl restart cloudstack-agent
|
||||
exit $?
|
||||
;;
|
||||
graceful-reboot)
|
||||
/usr/bin/logger -t heartbeat "${source_script}: heartbeat write to storage failed; fence action 'graceful-reboot' — rebooting via systemctl (allows running VMs to stop cleanly)."
|
||||
sync &
|
||||
sleep 5
|
||||
systemctl reboot
|
||||
exit $?
|
||||
;;
|
||||
custom)
|
||||
if [ -x "$CUSTOM_SCRIPT" ]; then
|
||||
/usr/bin/logger -t heartbeat "${source_script}: heartbeat write to storage failed; fence action 'custom' — running ${CUSTOM_SCRIPT}."
|
||||
sync &
|
||||
sleep 2
|
||||
"$CUSTOM_SCRIPT" "$source_script"
|
||||
exit $?
|
||||
else
|
||||
/usr/bin/logger -t heartbeat "${source_script}: heartbeat write to storage failed; fence action 'custom' selected but ${CUSTOM_SCRIPT} is missing or not executable — falling back to hard-reboot."
|
||||
sync &
|
||||
sleep 5
|
||||
echo b > /proc/sysrq-trigger
|
||||
exit $?
|
||||
fi
|
||||
;;
|
||||
hard-reboot|reboot|*)
|
||||
# 'reboot' kept as alias for back-compat with pre-existing deployments.
|
||||
/usr/bin/logger -t heartbeat "${source_script} will reboot system because it was unable to write the heartbeat to the storage."
|
||||
sync &
|
||||
sleep 5
|
||||
echo b > /proc/sysrq-trigger
|
||||
exit $?
|
||||
;;
|
||||
esac
|
||||
}
|
||||
|
|
@ -156,11 +156,9 @@ then
|
|||
exit 0
|
||||
elif [ "$cflag" == "1" ]
|
||||
then
|
||||
/usr/bin/logger -t heartbeat "kvmheartbeat.sh will reboot system because it was unable to write the heartbeat to the storage."
|
||||
sync &
|
||||
sleep 5
|
||||
echo b > /proc/sysrq-trigger
|
||||
exit $?
|
||||
# shellcheck disable=SC1091
|
||||
. "$(dirname "$0")/kvmha-fence.sh"
|
||||
fence_action "kvmheartbeat.sh"
|
||||
else
|
||||
write_hbLog
|
||||
exit $?
|
||||
|
|
|
|||
|
|
@ -58,9 +58,7 @@ deleteVMs() {
|
|||
|
||||
if [ "$cflag" == "1" ]
|
||||
then
|
||||
/usr/bin/logger -t heartbeat "kvmspheartbeat.sh will reboot system because it was unable to write the heartbeat to the storage."
|
||||
sync &
|
||||
sleep 5
|
||||
echo b > /proc/sysrq-trigger
|
||||
exit $?
|
||||
# shellcheck disable=SC1091
|
||||
. "$(dirname "$0")/kvmha-fence.sh"
|
||||
fence_action "kvmspheartbeat.sh"
|
||||
fi
|
||||
|
|
|
|||
Loading…
Reference in New Issue