mirror of https://github.com/apache/cloudstack.git
KVM: Enable HA heartbeat on ShareMountPoint (#12773)
This commit is contained in:
parent
2a60305792
commit
e297644ce1
|
|
@ -21,6 +21,7 @@ import static org.apache.cloudstack.framework.config.ConfigKey.Scope.Cluster;
|
|||
import com.cloud.deploy.DeploymentPlanner;
|
||||
import com.cloud.host.HostVO;
|
||||
import com.cloud.host.Status;
|
||||
import com.cloud.storage.Storage.StoragePoolType;
|
||||
import com.cloud.utils.component.Manager;
|
||||
import com.cloud.vm.VMInstanceVO;
|
||||
import org.apache.cloudstack.framework.config.ConfigKey;
|
||||
|
|
@ -32,6 +33,8 @@ import java.util.List;
|
|||
*/
|
||||
public interface HighAvailabilityManager extends Manager {
|
||||
|
||||
List<StoragePoolType> LIBVIRT_STORAGE_POOL_TYPES_WITH_HA_SUPPORT = List.of(StoragePoolType.NetworkFilesystem, StoragePoolType.SharedMountPoint);
|
||||
|
||||
ConfigKey<Boolean> ForceHA = new ConfigKey<>("Advanced", Boolean.class, "force.ha", "false",
|
||||
"Force High-Availability to happen even if the VM says no.", true, Cluster);
|
||||
|
||||
|
|
|
|||
|
|
@ -35,7 +35,6 @@ import com.cloud.agent.properties.AgentPropertiesFileHandler;
|
|||
public class KVMHABase {
|
||||
protected Logger logger = LogManager.getLogger(getClass());
|
||||
private long _timeout = 60000; /* 1 minutes */
|
||||
protected static String s_heartBeatPath;
|
||||
protected long _heartBeatUpdateTimeout = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.HEARTBEAT_UPDATE_TIMEOUT);
|
||||
protected long _heartBeatUpdateFreq = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.KVM_HEARTBEAT_UPDATE_FREQUENCY);
|
||||
protected long _heartBeatUpdateMaxTries = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.KVM_HEARTBEAT_UPDATE_MAX_TRIES);
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ package com.cloud.hypervisor.kvm.resource;
|
|||
|
||||
import com.cloud.agent.properties.AgentProperties;
|
||||
import com.cloud.agent.properties.AgentPropertiesFileHandler;
|
||||
import com.cloud.storage.Storage.StoragePoolType;
|
||||
import com.cloud.ha.HighAvailabilityManager;
|
||||
import com.cloud.utils.script.Script;
|
||||
import org.libvirt.Connect;
|
||||
import org.libvirt.LibvirtException;
|
||||
|
|
@ -39,20 +39,15 @@ public class KVMHAMonitor extends KVMHABase implements Runnable {
|
|||
|
||||
private final String hostPrivateIp;
|
||||
|
||||
public KVMHAMonitor(HAStoragePool pool, String host, String scriptPath) {
|
||||
public KVMHAMonitor(HAStoragePool pool, String host) {
|
||||
if (pool != null) {
|
||||
storagePool.put(pool.getPoolUUID(), pool);
|
||||
}
|
||||
hostPrivateIp = host;
|
||||
configureHeartBeatPath(scriptPath);
|
||||
|
||||
rebootHostAndAlertManagementOnHeartbeatTimeout = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.REBOOT_HOST_AND_ALERT_MANAGEMENT_ON_HEARTBEAT_TIMEOUT);
|
||||
}
|
||||
|
||||
private static synchronized void configureHeartBeatPath(String scriptPath) {
|
||||
KVMHABase.s_heartBeatPath = scriptPath;
|
||||
}
|
||||
|
||||
public void addStoragePool(HAStoragePool pool) {
|
||||
synchronized (storagePool) {
|
||||
storagePool.put(pool.getPoolUUID(), pool);
|
||||
|
|
@ -86,8 +81,8 @@ public class KVMHAMonitor extends KVMHABase implements Runnable {
|
|||
Set<String> removedPools = new HashSet<>();
|
||||
for (String uuid : storagePool.keySet()) {
|
||||
HAStoragePool primaryStoragePool = storagePool.get(uuid);
|
||||
if (primaryStoragePool.getPool().getType() == StoragePoolType.NetworkFilesystem) {
|
||||
checkForNotExistingPools(removedPools, uuid);
|
||||
if (HighAvailabilityManager.LIBVIRT_STORAGE_POOL_TYPES_WITH_HA_SUPPORT.contains(primaryStoragePool.getPool().getType())) {
|
||||
checkForNotExistingLibvirtStoragePools(removedPools, uuid);
|
||||
if (removedPools.contains(uuid)) {
|
||||
continue;
|
||||
}
|
||||
|
|
@ -127,7 +122,7 @@ public class KVMHAMonitor extends KVMHABase implements Runnable {
|
|||
return result;
|
||||
}
|
||||
|
||||
private void checkForNotExistingPools(Set<String> removedPools, String uuid) {
|
||||
private void checkForNotExistingLibvirtStoragePools(Set<String> removedPools, String uuid) {
|
||||
try {
|
||||
Connect conn = LibvirtConnection.getConnection();
|
||||
StoragePool storage = conn.storagePoolLookupByUUIDString(uuid);
|
||||
|
|
|
|||
|
|
@ -1063,11 +1063,6 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv
|
|||
throw new ConfigurationException("Unable to find patch.sh");
|
||||
}
|
||||
|
||||
heartBeatPath = Script.findScript(kvmScriptsDir, "kvmheartbeat.sh");
|
||||
if (heartBeatPath == null) {
|
||||
throw new ConfigurationException("Unable to find kvmheartbeat.sh");
|
||||
}
|
||||
|
||||
createVmPath = Script.findScript(storageScriptsDir, "createvm.sh");
|
||||
if (createVmPath == null) {
|
||||
throw new ConfigurationException("Unable to find the createvm.sh");
|
||||
|
|
@ -1330,7 +1325,7 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv
|
|||
|
||||
final String[] info = NetUtils.getNetworkParams(privateNic);
|
||||
|
||||
kvmhaMonitor = new KVMHAMonitor(null, info[0], heartBeatPath);
|
||||
kvmhaMonitor = new KVMHAMonitor(null, info[0]);
|
||||
final Thread ha = new Thread(kvmhaMonitor);
|
||||
ha.start();
|
||||
|
||||
|
|
|
|||
|
|
@ -48,7 +48,7 @@ public final class LibvirtCheckVMActivityOnStoragePoolCommandWrapper extends Com
|
|||
|
||||
KVMStoragePool primaryPool = storagePoolMgr.getStoragePool(pool.getType(), pool.getUuid());
|
||||
|
||||
if (primaryPool.isPoolSupportHA()){
|
||||
if (primaryPool.isPoolSupportHA()) {
|
||||
final HAStoragePool nfspool = monitor.getStoragePool(pool.getUuid());
|
||||
final KVMHAVMActivityChecker ha = new KVMHAVMActivityChecker(nfspool, command.getHost(), command.getVolumeList(), libvirtComputingResource.getVmActivityCheckPath(), command.getSuspectTimeInSeconds());
|
||||
final Future<Boolean> future = executors.submit(ha);
|
||||
|
|
|
|||
|
|
@ -289,6 +289,7 @@ public class KVMStoragePoolManager {
|
|||
|
||||
if (pool instanceof LibvirtStoragePool) {
|
||||
addPoolDetails(uuid, (LibvirtStoragePool) pool);
|
||||
((LibvirtStoragePool) pool).setType(type);
|
||||
}
|
||||
|
||||
return pool;
|
||||
|
|
@ -390,6 +391,9 @@ public class KVMStoragePoolManager {
|
|||
private synchronized KVMStoragePool createStoragePool(String name, String host, int port, String path, String userInfo, StoragePoolType type, Map<String, String> details, boolean primaryStorage) {
|
||||
StorageAdaptor adaptor = getStorageAdaptor(type);
|
||||
KVMStoragePool pool = adaptor.createStoragePool(name, host, port, path, userInfo, type, details, primaryStorage);
|
||||
if (pool instanceof LibvirtStoragePool) {
|
||||
((LibvirtStoragePool) pool).setType(type);
|
||||
}
|
||||
|
||||
// LibvirtStorageAdaptor-specific statement
|
||||
if (pool.isPoolSupportHA() && primaryStorage) {
|
||||
|
|
|
|||
|
|
@ -31,6 +31,7 @@ import org.apache.cloudstack.utils.qemu.QemuImg.PhysicalDiskFormat;
|
|||
import com.cloud.agent.api.to.HostTO;
|
||||
import com.cloud.agent.properties.AgentProperties;
|
||||
import com.cloud.agent.properties.AgentPropertiesFileHandler;
|
||||
import com.cloud.ha.HighAvailabilityManager;
|
||||
import com.cloud.hypervisor.kvm.resource.KVMHABase.HAStoragePool;
|
||||
import com.cloud.storage.Storage;
|
||||
import com.cloud.storage.Storage.StoragePoolType;
|
||||
|
|
@ -320,13 +321,24 @@ public class LibvirtStoragePool implements KVMStoragePool {
|
|||
|
||||
@Override
|
||||
public boolean isPoolSupportHA() {
|
||||
return type == StoragePoolType.NetworkFilesystem;
|
||||
return HighAvailabilityManager.LIBVIRT_STORAGE_POOL_TYPES_WITH_HA_SUPPORT.contains(type);
|
||||
}
|
||||
|
||||
public String getHearthBeatPath() {
|
||||
if (type == StoragePoolType.NetworkFilesystem) {
|
||||
if (StoragePoolType.NetworkFilesystem.equals(type)) {
|
||||
String kvmScriptsDir = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.KVM_SCRIPTS_DIR);
|
||||
return Script.findScript(kvmScriptsDir, "kvmheartbeat.sh");
|
||||
String scriptPath = Script.findScript(kvmScriptsDir, "kvmheartbeat.sh");
|
||||
if (scriptPath == null) {
|
||||
throw new CloudRuntimeException("Unable to find heartbeat script 'kvmheartbeat.sh' in directory: " + kvmScriptsDir);
|
||||
}
|
||||
return scriptPath;
|
||||
} else if (StoragePoolType.SharedMountPoint.equals(type)) {
|
||||
String kvmScriptsDir = AgentPropertiesFileHandler.getPropertyValue(AgentProperties.KVM_SCRIPTS_DIR);
|
||||
String scriptPath = Script.findScript(kvmScriptsDir, "kvmsmpheartbeat.sh");
|
||||
if (scriptPath == null) {
|
||||
throw new CloudRuntimeException("Unable to find heartbeat script 'kvmsmpheartbeat.sh' in directory: " + kvmScriptsDir);
|
||||
}
|
||||
return scriptPath;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
|
@ -410,4 +422,8 @@ public class LibvirtStoragePool implements KVMStoragePool {
|
|||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
public void setType(StoragePoolType type) {
|
||||
this.type = type;
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -27,6 +27,7 @@ import java.util.UUID;
|
|||
import javax.inject.Inject;
|
||||
|
||||
import com.cloud.agent.api.to.DiskTO;
|
||||
import com.cloud.ha.HighAvailabilityManager;
|
||||
import com.cloud.storage.VolumeVO;
|
||||
import org.apache.cloudstack.engine.orchestration.service.VolumeOrchestrationService;
|
||||
import org.apache.cloudstack.engine.subsystem.api.storage.ChapInfo;
|
||||
|
|
@ -587,7 +588,7 @@ public class CloudStackPrimaryDataStoreDriverImpl implements PrimaryDataStoreDri
|
|||
|
||||
@Override
|
||||
public boolean isStorageSupportHA(StoragePoolType type) {
|
||||
return StoragePoolType.NetworkFilesystem == type;
|
||||
return type != null && HighAvailabilityManager.LIBVIRT_STORAGE_POOL_TYPES_WITH_HA_SUPPORT.contains(type);
|
||||
}
|
||||
|
||||
@Override
|
||||
|
|
|
|||
|
|
@ -0,0 +1,218 @@
|
|||
#!/bin/bash
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
help() {
|
||||
printf "Usage: $0
|
||||
-i identifier (required for CLI compatibility; value ignored by local-only heartbeat)
|
||||
-p path (required for CLI compatibility; value ignored by local-only heartbeat)
|
||||
-m mount point (local path where heartbeat will be written)
|
||||
-h host (host IP/name to include in heartbeat filename)
|
||||
-r write/read hb log (read-check mode)
|
||||
-c cleanup (trigger emergency reboot)
|
||||
-t interval between read hb log\n"
|
||||
exit 1
|
||||
}
|
||||
|
||||
#set -x
|
||||
NfsSvrIP=
|
||||
NfsSvrPath=
|
||||
MountPoint=
|
||||
HostIP=
|
||||
interval=
|
||||
rflag=0
|
||||
cflag=0
|
||||
|
||||
while getopts 'i:p:m:h:t:rc' OPTION
|
||||
do
|
||||
case $OPTION in
|
||||
i)
|
||||
NfsSvrIP="$OPTARG"
|
||||
;; # retained for CLI compatibility but unused for this script
|
||||
p)
|
||||
NfsSvrPath="$OPTARG"
|
||||
;; # retained for CLI compatibility but unused for this script
|
||||
m)
|
||||
MountPoint="$OPTARG"
|
||||
;;
|
||||
h)
|
||||
HostIP="$OPTARG"
|
||||
;;
|
||||
r)
|
||||
rflag=1
|
||||
;;
|
||||
t)
|
||||
interval="$OPTARG"
|
||||
;;
|
||||
c)
|
||||
cflag=1
|
||||
;;
|
||||
*)
|
||||
help
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
# For heartbeat we require a mountpoint
|
||||
if [ -z "$MountPoint" ]
|
||||
then
|
||||
echo "Mount point (-m) is required"
|
||||
help
|
||||
fi
|
||||
|
||||
# Validate mount point exists, is (if possible) a mounted filesystem, and is writable
|
||||
if [ ! -d "$MountPoint" ]; then
|
||||
echo "Mount point directory does not exist: $MountPoint" >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# If the 'mountpoint' utility is available, ensure this is an actual mount
|
||||
if command -v mountpoint >/dev/null 2>&1; then
|
||||
if ! mountpoint -q "$MountPoint"; then
|
||||
echo "Mount point is not a mounted filesystem: $MountPoint" >&2
|
||||
exit 1
|
||||
fi
|
||||
fi
|
||||
|
||||
# Ensure the mount point is writable
|
||||
if [ ! -w "$MountPoint" ]; then
|
||||
echo "Mount point is not writable: $MountPoint" >&2
|
||||
exit 1
|
||||
fi
|
||||
#delete VMs on this mountpoint (best-effort)
|
||||
deleteVMs() {
|
||||
local mountPoint=$1
|
||||
# ensure it ends with a single trailing slash
|
||||
mountPoint="${mountPoint%/}/"
|
||||
|
||||
vmPids=$(ps aux | grep qemu | grep "$mountPoint" | awk '{print $2}' 2> /dev/null)
|
||||
|
||||
if [ -z "$vmPids" ]
|
||||
then
|
||||
return
|
||||
fi
|
||||
|
||||
for pid in $vmPids
|
||||
do
|
||||
kill -9 $pid &> /dev/null
|
||||
done
|
||||
}
|
||||
|
||||
#checking is there the mount point present under $MountPoint?
|
||||
if grep -q "^[^ ]\+ $MountPoint " /proc/mounts
|
||||
then
|
||||
# mount exists; nothing to do here; keep for compatibility with original flow
|
||||
:
|
||||
else
|
||||
# mount point not present
|
||||
# if not in read-check mode, consider deleting VMs similar to original behavior
|
||||
if [ "$rflag" == "0" ]
|
||||
then
|
||||
deleteVMs $MountPoint
|
||||
fi
|
||||
fi
|
||||
|
||||
hbFolder="$MountPoint/KVMHA"
|
||||
hbFile="$hbFolder/hb-$HostIP"
|
||||
|
||||
write_hbLog() {
|
||||
#write the heart beat log
|
||||
stat "$hbFile" &> /dev/null
|
||||
if [ $? -gt 0 ]
|
||||
then
|
||||
# create a new one
|
||||
mkdir -p "$hbFolder" &> /dev/null
|
||||
# touch will be done by atomic write below; ensure folder is writable
|
||||
if [ ! -w "$hbFolder" ]; then
|
||||
printf "Folder not writable: $hbFolder" >&2
|
||||
return 2
|
||||
fi
|
||||
fi
|
||||
|
||||
timestamp=$(date +%s)
|
||||
# Write atomically to avoid partial writes (write to tmp then mv)
|
||||
tmpfile="${hbFile}.$$"
|
||||
printf "%s\n" "$timestamp" > "$tmpfile" 2>/dev/null
|
||||
if [ $? -ne 0 ]; then
|
||||
printf "Failed to write heartbeat to $tmpfile" >&2
|
||||
return 2
|
||||
fi
|
||||
mv -f "$tmpfile" "$hbFile" 2>/dev/null
|
||||
return $?
|
||||
}
|
||||
|
||||
check_hbLog() {
|
||||
hb_diff=0
|
||||
if [ ! -f "$hbFile" ]; then
|
||||
# signal large difference if file missing
|
||||
hb_diff=999999
|
||||
return 1
|
||||
fi
|
||||
now=$(date +%s)
|
||||
hb=$(cat "$hbFile" 2>/dev/null)
|
||||
if [ -z "$hb" ]; then
|
||||
hb_diff=999998
|
||||
return 1
|
||||
fi
|
||||
diff=`expr $now - $hb 2>/dev/null`
|
||||
if [ $? -ne 0 ]
|
||||
then
|
||||
hb_diff=999997
|
||||
return 1
|
||||
fi
|
||||
if [ -z "$interval" ]; then
|
||||
# if no interval provided, consider 0 as success
|
||||
if [ $diff -gt 0 ]; then
|
||||
hb_diff=$diff
|
||||
return 1
|
||||
else
|
||||
hb_diff=0
|
||||
return 0
|
||||
fi
|
||||
fi
|
||||
if [ $diff -gt $interval ]
|
||||
then
|
||||
hb_diff=$diff
|
||||
return 1
|
||||
fi
|
||||
hb_diff=0
|
||||
return 0
|
||||
}
|
||||
|
||||
if [ "$rflag" == "1" ]
|
||||
then
|
||||
check_hbLog
|
||||
status=$?
|
||||
diff="${hb_diff:-0}"
|
||||
if [ $status -eq 0 ]
|
||||
then
|
||||
echo "=====> ALIVE <====="
|
||||
else
|
||||
echo "=====> Considering host as DEAD because last write on [$hbFile] was [$diff] seconds ago, but the max interval is [$interval] <======"
|
||||
fi
|
||||
exit 0
|
||||
elif [ "$cflag" == "1" ]
|
||||
then
|
||||
/usr/bin/logger -t heartbeat "kvmsmpheartbeat.sh will reboot system because it was unable to write the heartbeat to the storage."
|
||||
sync &
|
||||
sleep 5
|
||||
echo b > /proc/sysrq-trigger
|
||||
exit $?
|
||||
else
|
||||
write_hbLog
|
||||
exit $?
|
||||
fi
|
||||
Loading…
Reference in New Issue