KVM: Enable HA heartbeat on ShareMountPoint (#12773)

This commit is contained in:
Wei Zhou 2026-04-10 10:42:40 +02:00 committed by GitHub
parent 2a60305792
commit e297644ce1
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
9 changed files with 253 additions and 22 deletions

View File

@ -21,6 +21,7 @@ import static org.apache.cloudstack.framework.config.ConfigKey.Scope.Cluster;
import com.cloud.deploy.DeploymentPlanner;
import com.cloud.host.HostVO;
import com.cloud.host.Status;
import com.cloud.storage.Storage.StoragePoolType;
import com.cloud.utils.component.Manager;
import com.cloud.vm.VMInstanceVO;
import org.apache.cloudstack.framework.config.ConfigKey;
@ -32,6 +33,8 @@ import java.util.List;
*/
public interface HighAvailabilityManager extends Manager {
List<StoragePoolType> LIBVIRT_STORAGE_POOL_TYPES_WITH_HA_SUPPORT = List.of(StoragePoolType.NetworkFilesystem, StoragePoolType.SharedMountPoint);
ConfigKey<Boolean> ForceHA = new ConfigKey<>("Advanced", Boolean.class, "force.ha", "false",
"Force High-Availability to happen even if the VM says no.", true, Cluster);

View File

@ -35,7 +35,6 @@ import com.cloud.agent.properties.AgentPropertiesFileHandler;
public class KVMHABase {
protected Logger logger = LogManager.getLogger(getClass());
private long _timeout = 60000; /* 1 minutes */
protected static String s_heartBeatPath;
protected long _heartBeatUpdateTimeout = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.HEARTBEAT_UPDATE_TIMEOUT);
protected long _heartBeatUpdateFreq = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.KVM_HEARTBEAT_UPDATE_FREQUENCY);
protected long _heartBeatUpdateMaxTries = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.KVM_HEARTBEAT_UPDATE_MAX_TRIES);

View File

@ -18,7 +18,7 @@ package com.cloud.hypervisor.kvm.resource;
import com.cloud.agent.properties.AgentProperties;
import com.cloud.agent.properties.AgentPropertiesFileHandler;
import com.cloud.storage.Storage.StoragePoolType;
import com.cloud.ha.HighAvailabilityManager;
import com.cloud.utils.script.Script;
import org.libvirt.Connect;
import org.libvirt.LibvirtException;
@ -39,20 +39,15 @@ public class KVMHAMonitor extends KVMHABase implements Runnable {
private final String hostPrivateIp;
public KVMHAMonitor(HAStoragePool pool, String host, String scriptPath) {
public KVMHAMonitor(HAStoragePool pool, String host) {
if (pool != null) {
storagePool.put(pool.getPoolUUID(), pool);
}
hostPrivateIp = host;
configureHeartBeatPath(scriptPath);
rebootHostAndAlertManagementOnHeartbeatTimeout = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.REBOOT_HOST_AND_ALERT_MANAGEMENT_ON_HEARTBEAT_TIMEOUT);
}
private static synchronized void configureHeartBeatPath(String scriptPath) {
KVMHABase.s_heartBeatPath = scriptPath;
}
public void addStoragePool(HAStoragePool pool) {
synchronized (storagePool) {
storagePool.put(pool.getPoolUUID(), pool);
@ -86,8 +81,8 @@ public class KVMHAMonitor extends KVMHABase implements Runnable {
Set<String> removedPools = new HashSet<>();
for (String uuid : storagePool.keySet()) {
HAStoragePool primaryStoragePool = storagePool.get(uuid);
if (primaryStoragePool.getPool().getType() == StoragePoolType.NetworkFilesystem) {
checkForNotExistingPools(removedPools, uuid);
if (HighAvailabilityManager.LIBVIRT_STORAGE_POOL_TYPES_WITH_HA_SUPPORT.contains(primaryStoragePool.getPool().getType())) {
checkForNotExistingLibvirtStoragePools(removedPools, uuid);
if (removedPools.contains(uuid)) {
continue;
}
@ -127,7 +122,7 @@ public class KVMHAMonitor extends KVMHABase implements Runnable {
return result;
}
private void checkForNotExistingPools(Set<String> removedPools, String uuid) {
private void checkForNotExistingLibvirtStoragePools(Set<String> removedPools, String uuid) {
try {
Connect conn = LibvirtConnection.getConnection();
StoragePool storage = conn.storagePoolLookupByUUIDString(uuid);

View File

@ -1063,11 +1063,6 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv
throw new ConfigurationException("Unable to find patch.sh");
}
heartBeatPath = Script.findScript(kvmScriptsDir, "kvmheartbeat.sh");
if (heartBeatPath == null) {
throw new ConfigurationException("Unable to find kvmheartbeat.sh");
}
createVmPath = Script.findScript(storageScriptsDir, "createvm.sh");
if (createVmPath == null) {
throw new ConfigurationException("Unable to find the createvm.sh");
@ -1330,7 +1325,7 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv
final String[] info = NetUtils.getNetworkParams(privateNic);
kvmhaMonitor = new KVMHAMonitor(null, info[0], heartBeatPath);
kvmhaMonitor = new KVMHAMonitor(null, info[0]);
final Thread ha = new Thread(kvmhaMonitor);
ha.start();

View File

@ -48,7 +48,7 @@ public final class LibvirtCheckVMActivityOnStoragePoolCommandWrapper extends Com
KVMStoragePool primaryPool = storagePoolMgr.getStoragePool(pool.getType(), pool.getUuid());
if (primaryPool.isPoolSupportHA()){
if (primaryPool.isPoolSupportHA()) {
final HAStoragePool nfspool = monitor.getStoragePool(pool.getUuid());
final KVMHAVMActivityChecker ha = new KVMHAVMActivityChecker(nfspool, command.getHost(), command.getVolumeList(), libvirtComputingResource.getVmActivityCheckPath(), command.getSuspectTimeInSeconds());
final Future<Boolean> future = executors.submit(ha);

View File

@ -289,6 +289,7 @@ public class KVMStoragePoolManager {
if (pool instanceof LibvirtStoragePool) {
addPoolDetails(uuid, (LibvirtStoragePool) pool);
((LibvirtStoragePool) pool).setType(type);
}
return pool;
@ -390,6 +391,9 @@ public class KVMStoragePoolManager {
private synchronized KVMStoragePool createStoragePool(String name, String host, int port, String path, String userInfo, StoragePoolType type, Map<String, String> details, boolean primaryStorage) {
StorageAdaptor adaptor = getStorageAdaptor(type);
KVMStoragePool pool = adaptor.createStoragePool(name, host, port, path, userInfo, type, details, primaryStorage);
if (pool instanceof LibvirtStoragePool) {
((LibvirtStoragePool) pool).setType(type);
}
// LibvirtStorageAdaptor-specific statement
if (pool.isPoolSupportHA() && primaryStorage) {

View File

@ -31,6 +31,7 @@ import org.apache.cloudstack.utils.qemu.QemuImg.PhysicalDiskFormat;
import com.cloud.agent.api.to.HostTO;
import com.cloud.agent.properties.AgentProperties;
import com.cloud.agent.properties.AgentPropertiesFileHandler;
import com.cloud.ha.HighAvailabilityManager;
import com.cloud.hypervisor.kvm.resource.KVMHABase.HAStoragePool;
import com.cloud.storage.Storage;
import com.cloud.storage.Storage.StoragePoolType;
@ -320,13 +321,24 @@ public class LibvirtStoragePool implements KVMStoragePool {
@Override
public boolean isPoolSupportHA() {
return type == StoragePoolType.NetworkFilesystem;
return HighAvailabilityManager.LIBVIRT_STORAGE_POOL_TYPES_WITH_HA_SUPPORT.contains(type);
}
public String getHearthBeatPath() {
if (type == StoragePoolType.NetworkFilesystem) {
if (StoragePoolType.NetworkFilesystem.equals(type)) {
String kvmScriptsDir = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.KVM_SCRIPTS_DIR);
return Script.findScript(kvmScriptsDir, "kvmheartbeat.sh");
String scriptPath = Script.findScript(kvmScriptsDir, "kvmheartbeat.sh");
if (scriptPath == null) {
throw new CloudRuntimeException("Unable to find heartbeat script 'kvmheartbeat.sh' in directory: " + kvmScriptsDir);
}
return scriptPath;
} else if (StoragePoolType.SharedMountPoint.equals(type)) {
String kvmScriptsDir = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.KVM_SCRIPTS_DIR);
String scriptPath = Script.findScript(kvmScriptsDir, "kvmsmpheartbeat.sh");
if (scriptPath == null) {
throw new CloudRuntimeException("Unable to find heartbeat script 'kvmsmpheartbeat.sh' in directory: " + kvmScriptsDir);
}
return scriptPath;
}
return null;
}
@ -410,4 +422,8 @@ public class LibvirtStoragePool implements KVMStoragePool {
return true;
}
}
public void setType(StoragePoolType type) {
this.type = type;
}
}

View File

@ -27,6 +27,7 @@ import java.util.UUID;
import javax.inject.Inject;
import com.cloud.agent.api.to.DiskTO;
import com.cloud.ha.HighAvailabilityManager;
import com.cloud.storage.VolumeVO;
import org.apache.cloudstack.engine.orchestration.service.VolumeOrchestrationService;
import org.apache.cloudstack.engine.subsystem.api.storage.ChapInfo;
@ -587,7 +588,7 @@ public class CloudStackPrimaryDataStoreDriverImpl implements PrimaryDataStoreDri
@Override
public boolean isStorageSupportHA(StoragePoolType type) {
return StoragePoolType.NetworkFilesystem == type;
return type != null && HighAvailabilityManager.LIBVIRT_STORAGE_POOL_TYPES_WITH_HA_SUPPORT.contains(type);
}
@Override

View File

@ -0,0 +1,218 @@
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
help() {
printf "Usage: $0
-i identifier (required for CLI compatibility; value ignored by local-only heartbeat)
-p path (required for CLI compatibility; value ignored by local-only heartbeat)
-m mount point (local path where heartbeat will be written)
-h host (host IP/name to include in heartbeat filename)
-r write/read hb log (read-check mode)
-c cleanup (trigger emergency reboot)
-t interval between read hb log\n"
exit 1
}
#set -x
NfsSvrIP=
NfsSvrPath=
MountPoint=
HostIP=
interval=
rflag=0
cflag=0
while getopts 'i:p:m:h:t:rc' OPTION
do
case $OPTION in
i)
NfsSvrIP="$OPTARG"
;; # retained for CLI compatibility but unused for this script
p)
NfsSvrPath="$OPTARG"
;; # retained for CLI compatibility but unused for this script
m)
MountPoint="$OPTARG"
;;
h)
HostIP="$OPTARG"
;;
r)
rflag=1
;;
t)
interval="$OPTARG"
;;
c)
cflag=1
;;
*)
help
;;
esac
done
# For heartbeat we require a mountpoint
if [ -z "$MountPoint" ]
then
echo "Mount point (-m) is required"
help
fi
# Validate mount point exists, is (if possible) a mounted filesystem, and is writable
if [ ! -d "$MountPoint" ]; then
echo "Mount point directory does not exist: $MountPoint" >&2
exit 1
fi
# If the 'mountpoint' utility is available, ensure this is an actual mount
if command -v mountpoint >/dev/null 2>&1; then
if ! mountpoint -q "$MountPoint"; then
echo "Mount point is not a mounted filesystem: $MountPoint" >&2
exit 1
fi
fi
# Ensure the mount point is writable
if [ ! -w "$MountPoint" ]; then
echo "Mount point is not writable: $MountPoint" >&2
exit 1
fi
#delete VMs on this mountpoint (best-effort)
deleteVMs() {
local mountPoint=$1
# ensure it ends with a single trailing slash
mountPoint="${mountPoint%/}/"
vmPids=$(ps aux | grep qemu | grep "$mountPoint" | awk '{print $2}' 2> /dev/null)
if [ -z "$vmPids" ]
then
return
fi
for pid in $vmPids
do
kill -9 $pid &> /dev/null
done
}
#checking is there the mount point present under $MountPoint?
if grep -q "^[^ ]\+ $MountPoint " /proc/mounts
then
# mount exists; nothing to do here; keep for compatibility with original flow
:
else
# mount point not present
# if not in read-check mode, consider deleting VMs similar to original behavior
if [ "$rflag" == "0" ]
then
deleteVMs $MountPoint
fi
fi
hbFolder="$MountPoint/KVMHA"
hbFile="$hbFolder/hb-$HostIP"
write_hbLog() {
#write the heart beat log
stat "$hbFile" &> /dev/null
if [ $? -gt 0 ]
then
# create a new one
mkdir -p "$hbFolder" &> /dev/null
# touch will be done by atomic write below; ensure folder is writable
if [ ! -w "$hbFolder" ]; then
printf "Folder not writable: $hbFolder" >&2
return 2
fi
fi
timestamp=$(date +%s)
# Write atomically to avoid partial writes (write to tmp then mv)
tmpfile="${hbFile}.$$"
printf "%s\n" "$timestamp" > "$tmpfile" 2>/dev/null
if [ $? -ne 0 ]; then
printf "Failed to write heartbeat to $tmpfile" >&2
return 2
fi
mv -f "$tmpfile" "$hbFile" 2>/dev/null
return $?
}
check_hbLog() {
hb_diff=0
if [ ! -f "$hbFile" ]; then
# signal large difference if file missing
hb_diff=999999
return 1
fi
now=$(date +%s)
hb=$(cat "$hbFile" 2>/dev/null)
if [ -z "$hb" ]; then
hb_diff=999998
return 1
fi
diff=`expr $now - $hb 2>/dev/null`
if [ $? -ne 0 ]
then
hb_diff=999997
return 1
fi
if [ -z "$interval" ]; then
# if no interval provided, consider 0 as success
if [ $diff -gt 0 ]; then
hb_diff=$diff
return 1
else
hb_diff=0
return 0
fi
fi
if [ $diff -gt $interval ]
then
hb_diff=$diff
return 1
fi
hb_diff=0
return 0
}
if [ "$rflag" == "1" ]
then
check_hbLog
status=$?
diff="${hb_diff:-0}"
if [ $status -eq 0 ]
then
echo "=====> ALIVE <====="
else
echo "=====> Considering host as DEAD because last write on [$hbFile] was [$diff] seconds ago, but the max interval is [$interval] <======"
fi
exit 0
elif [ "$cflag" == "1" ]
then
/usr/bin/logger -t heartbeat "kvmsmpheartbeat.sh will reboot system because it was unable to write the heartbeat to the storage."
sync &
sleep 5
echo b > /proc/sysrq-trigger
exit $?
else
write_hbLog
exit $?
fi