From 53e9b18ed0acb9d19d6ee43ef8a2fa8fd971c7ab Mon Sep 17 00:00:00 2001 From: Pearl Dsilva Date: Wed, 6 May 2026 11:37:01 -0400 Subject: [PATCH] improve lock host retrieval logic and quicker retrival using db host as first check point and then fanning out --- .../clvm/command/ClvmLockTransferAnswer.java | 31 ++- ...LibvirtClvmLockTransferCommandWrapper.java | 33 +-- .../cloud/storage/clvm/ClvmPoolManager.java | 204 +++++++++-------- .../cloud/storage/ClvmPoolManagerTest.java | 208 ++++++++++++++---- 4 files changed, 317 insertions(+), 159 deletions(-) diff --git a/core/src/main/java/org/apache/cloudstack/storage/clvm/command/ClvmLockTransferAnswer.java b/core/src/main/java/org/apache/cloudstack/storage/clvm/command/ClvmLockTransferAnswer.java index cb6809f3d94..f3c43c400b2 100644 --- a/core/src/main/java/org/apache/cloudstack/storage/clvm/command/ClvmLockTransferAnswer.java +++ b/core/src/main/java/org/apache/cloudstack/storage/clvm/command/ClvmLockTransferAnswer.java @@ -27,7 +27,7 @@ public class ClvmLockTransferAnswer extends Answer { private String currentLockHostname; private boolean isActive; - private boolean isExclusive; + private boolean isOpen; private String lvAttributes; public ClvmLockTransferAnswer(ClvmLockTransferCommand cmd, boolean result, String details) { @@ -35,20 +35,18 @@ public class ClvmLockTransferAnswer extends Answer { } public ClvmLockTransferAnswer(ClvmLockTransferCommand cmd, boolean result, String details, - String currentLockHostname, boolean isActive, boolean isExclusive, + String currentLockHostname, boolean isActive, boolean isOpen, String lvAttributes) { super(cmd, result, details); this.currentLockHostname = currentLockHostname; this.isActive = isActive; - this.isExclusive = isExclusive; + this.isOpen = isOpen; this.lvAttributes = lvAttributes; } /** - * Get the hostname of the host currently holding the lock (if any). - * This is parsed from the LVM "lv_host" field. - * - * @return hostname or null if no lock is held + * Get the hostname from lv_host. Retained for diagnostics only — + * do NOT use this to determine lock holder identity. */ public String getCurrentLockHostname() { return currentLockHostname; @@ -59,9 +57,8 @@ public class ClvmLockTransferAnswer extends Answer { } /** - * Whether the volume is currently active on any host. - * - * @return true if active, false otherwise + * Whether the LV is locally active on the queried host (lv_attr[4]=='a'). + * This is the authoritative signal for lock holder discovery via fan-out. */ public boolean isActive() { return isActive; @@ -72,17 +69,15 @@ public class ClvmLockTransferAnswer extends Answer { } /** - * Whether the lock is exclusive (as opposed to shared). - * Only meaningful if isActive() is true. - * - * @return true if exclusive lock, false if shared + * Whether a process has the device file open on the queried host (lv_attr[5]=='o'). + * true means a VM is actively doing I/O on this host right now — do NOT deactivate. */ - public boolean isExclusive() { - return isExclusive; + public boolean isOpen() { + return isOpen; } - public void setExclusive(boolean exclusive) { - isExclusive = exclusive; + public void setOpen(boolean open) { + isOpen = open; } public String getLvAttributes() { diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtClvmLockTransferCommandWrapper.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtClvmLockTransferCommandWrapper.java index f2bdba3a83e..327db619e37 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtClvmLockTransferCommandWrapper.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtClvmLockTransferCommandWrapper.java @@ -96,13 +96,15 @@ public class LibvirtClvmLockTransferCommandWrapper } /** - * Query which host currently holds the CLVM lock for a volume. - * Executes: lvs -o lv_attr,lv_host --noheadings + * Query whether this host currently has the CLVM LV activated locally. + * Executes: lvs -o lv_attr,lv_host,lv_active --noheadings * - * This queries the actual CLVM lock state (source of truth). - * The lv_host attribute shows which host currently has the volume activated. - * - * @return ClvmLockTransferAnswer with lock holder hostname + * lv_attr[4]=='a' (isActive) is LOCAL and is the authoritative signal — true only on + * the host where the LV is currently activated. The management server fans out this + * query to all cluster hosts; the one returning isActive=true is the lock holder. + * lv_attr[5]=='o' (isOpen) means a VM has the device open on this host (doing I/O). + * lv_host is retained for diagnostic logging only — do NOT use it to identify the + * lock holder. */ private Answer handleQueryLockState(ClvmLockTransferCommand cmd, String lvPath, String volumeUuid) { try { @@ -121,14 +123,11 @@ public class LibvirtClvmLockTransferCommandWrapper String.format("lvs command failed: %s", result)); } - // We need to find the line that contains the actual lv_attr (starts with '-' or other attr chars) String[] lines = parser.getLines().split("\n"); String dataLine = null; for (String line : lines) { String trimmed = line.trim(); - // Skip empty lines and warning messages - // lv_attr always starts with '-', 'w', 'r', etc. and is at least 10 characters if (!trimmed.isEmpty() && trimmed.length() >= 10 && (trimmed.charAt(0) == '-' || trimmed.charAt(0) == 'w' || @@ -157,18 +156,21 @@ public class LibvirtClvmLockTransferCommandWrapper } String lvAttr = parts[0]; + // lv_host: for diagnostics only, unreliable for lock-holder identification String hostname = parts.length > 1 ? parts[1] : null; + // lv_attr[4]=='a' → LV is active on THIS host (local activation state) boolean isActive = lvAttr.length() > 4 && lvAttr.charAt(4) == 'a'; - boolean isExclusive = lvAttr.length() > 5 && lvAttr.charAt(5) == 'e'; + // lv_attr[5]=='o' → a process has the device file open on this host (VM doing I/O) + boolean isOpen = lvAttr.length() > 5 && lvAttr.charAt(5) == 'o'; - logger.info("Queried lock state for volume {}: attr={}, hostname={}, active={}, exclusive={}", - volumeUuid, lvAttr, hostname, isActive, isExclusive); + logger.info("Queried lock state for volume {}: attr={}, hostname={}, active={}, open={}", + volumeUuid, lvAttr, hostname, isActive, isOpen); return new ClvmLockTransferAnswer(cmd, true, - String.format("Lock state: active=%s, exclusive=%s, host=%s", - isActive, isExclusive, hostname != null ? hostname : "none"), - hostname, isActive, isExclusive, lvAttr); + String.format("Lock state: active=%s, open=%s, host=%s", + isActive, isOpen, hostname != null ? hostname : "none"), + hostname, isActive, isOpen, lvAttr); } catch (Exception e) { logger.error("Exception during lock state query for volume {}: {}", @@ -176,4 +178,5 @@ public class LibvirtClvmLockTransferCommandWrapper return new ClvmLockTransferAnswer(cmd, false, "Exception: " + e.getMessage()); } } + } diff --git a/server/src/main/java/com/cloud/storage/clvm/ClvmPoolManager.java b/server/src/main/java/com/cloud/storage/clvm/ClvmPoolManager.java index e5ce47bdee3..fba3deeecc2 100644 --- a/server/src/main/java/com/cloud/storage/clvm/ClvmPoolManager.java +++ b/server/src/main/java/com/cloud/storage/clvm/ClvmPoolManager.java @@ -18,6 +18,7 @@ */ package com.cloud.storage.clvm; +import java.util.ArrayList; import java.util.Arrays; import java.util.List; import javax.inject.Inject; @@ -151,105 +152,129 @@ public class ClvmPoolManager implements Configurable { return null; } - List hosts = null; + String vgName = pool.getPath(); + if (vgName.startsWith("/")) { + vgName = vgName.substring(1); + } + String lvPath = String.format("/dev/%s/%s", vgName, volumePath); + // Fast path: trust the DB record and verify with a single host query + Long dbHostId = getClvmLockHostId(volumeId, volumeUuid); + if (dbHostId != null) { + HostVO dbHost = _hostDao.findById(dbHostId); + if (dbHost != null && dbHost.getStatus() == Status.Up + && dbHost.getHypervisorType() == Hypervisor.HypervisorType.KVM) { + Boolean active = querySingleHostLockState(dbHostId, lvPath, volumeUuid); + if (Boolean.TRUE.equals(active)) { + logger.debug("Fast path: volume {} confirmed active on DB host {}", volumeUuid, dbHostId); + return dbHostId; + } + logger.info("Fast path miss: volume {} not active on DB host {} — falling back to full fan-out", + volumeUuid, dbHostId); + } else { + logger.info("Fast path skip: DB host {} for volume {} is down/missing — falling back to full fan-out", + dbHostId, volumeUuid); + } + } + + List hosts = null; Long clusterId = pool.getClusterId(); if (clusterId != null) { hosts = _hostDao.findByClusterId(clusterId, Host.Type.Routing); - logger.debug("Found {} routing hosts in cluster {} for pool {}", - hosts != null ? hosts.size() : 0, clusterId, pool.getName()); } - if ((hosts == null || hosts.isEmpty()) && pool.getDataCenterId() > 0) { - logger.debug("Pool {} is zone-scoped or no hosts in cluster, checking zone {} for available routing hosts", - pool.getName(), pool.getDataCenterId()); hosts = _hostDao.findByDataCenterId(pool.getDataCenterId()); } - if (hosts == null || hosts.isEmpty()) { logger.warn("No KVM routing hosts found to query CLVM lock state for volume {} (pool: {}, cluster: {}, zone: {})", volumeUuid, pool.getName(), clusterId, pool.getDataCenterId()); return null; } - logger.debug("Querying lock state for volume {} from {} available hosts", volumeUuid, hosts.size()); + List activeHostIds = new ArrayList<>(); for (HostVO host : hosts) { if (host.getStatus() != Status.Up || - host.getType() != com.cloud.host.Host.Type.Routing || + host.getType() != Host.Type.Routing || host.getHypervisorType() != Hypervisor.HypervisorType.KVM) { continue; } - - String vgName = pool.getPath(); - if (vgName.startsWith("/")) { - vgName = vgName.substring(1); + // Skip the DB host, already confirmed inactive in the fast path above + if (dbHostId != null && host.getId() == dbHostId) { + continue; } - String lvPath = String.format("/dev/%s/%s", vgName, volumePath); - try { - ClvmLockTransferCommand queryCmd = new ClvmLockTransferCommand( - ClvmLockTransferCommand.Operation.QUERY_LOCK_STATE, - lvPath, - volumeUuid - ); - - Answer answer = _agentMgr.send(host.getId(), queryCmd); - - if (answer == null || !answer.getResult()) { - logger.debug("Failed to query lock state from host {}: {}", - host.getId(), answer != null ? answer.getDetails() : "null answer"); - continue; - } - - if (!(answer instanceof ClvmLockTransferAnswer)) { - logger.warn("Unexpected answer type for query lock state: {}", answer.getClass()); - continue; - } - - ClvmLockTransferAnswer queryAnswer = (ClvmLockTransferAnswer) answer; - String hostname = queryAnswer.getCurrentLockHostname(); - - if (hostname == null || hostname.isEmpty()) { - logger.debug("Volume {} is not locked (no exclusive lock held)", volumeUuid); - if (updateDatabase) { - VolumeDetailVO detail = _volsDetailsDao.findDetail(volumeId, CLVM_LOCK_HOST_ID); - if (detail != null) { - _volsDetailsDao.remove(detail.getId()); - } - } - return null; - } - - HostVO lockHost = _hostDao.findByName(hostname); - if (lockHost == null) { - logger.warn("Could not resolve hostname {} to host ID for volume {}", - hostname, volumeUuid); - return null; - } - - Long lockHostId = lockHost.getId(); - logger.info("Queried CLVM lock state for volume {}: locked by host {} ({}), exclusive={}", - volumeUuid, lockHostId, hostname, queryAnswer.isExclusive()); - - if (updateDatabase) { - Long dbHostId = getClvmLockHostId(volumeId, volumeUuid); - if (dbHostId == null || !dbHostId.equals(lockHostId)) { - logger.info("Correcting database: volume {} lock host: {} -> {} (actual)", - volumeUuid, dbHostId, lockHostId); - setClvmLockHostId(volumeId, lockHostId); - } - } - - return lockHostId; - - } catch (AgentUnavailableException | OperationTimedoutException e) { - logger.debug("Could not query host {} for lock state: {}", host.getId(), e.getMessage()); + Boolean active = querySingleHostLockState(host.getId(), lvPath, volumeUuid); + if (Boolean.TRUE.equals(active)) { + logger.debug("Volume {} is locally active on host {} (fan-out)", volumeUuid, host.getId()); + activeHostIds.add(host.getId()); } } - logger.warn("Could not query CLVM lock state for volume {} from any host", volumeUuid); - return null; + if (activeHostIds.isEmpty()) { + logger.debug("Volume {} is not active on any reachable host — no exclusive lock held", volumeUuid); + if (updateDatabase && dbHostId != null) { + VolumeDetailVO detail = _volsDetailsDao.findDetail(volumeId, CLVM_LOCK_HOST_ID); + if (detail != null) { + _volsDetailsDao.remove(detail.getId()); + } + } + return null; + } + + if (activeHostIds.size() > 1) { + logger.warn("Volume {} is active on {} hosts {}, shared-mode LV (template?). " + + "Skipping exclusive lock transfer.", + volumeUuid, activeHostIds.size(), activeHostIds); + return null; + } + + Long lockHostId = activeHostIds.get(0); + logger.info("Volume {} is exclusively active on host {} (found via fan-out, DB had {})", + volumeUuid, lockHostId, dbHostId); + + if (updateDatabase) { + if (dbHostId == null || !dbHostId.equals(lockHostId)) { + logger.info("Correcting database: volume {} lock host: {} -> {} (actual)", + volumeUuid, dbHostId, lockHostId); + setClvmLockHostId(volumeId, lockHostId); + } + } + + return lockHostId; + } + + /** + * Queries a single host for the CLVM LV activation state. + * + * @return {@code Boolean.TRUE} if the LV is active on that host, + * {@code Boolean.FALSE} if reachable but inactive, + * {@code null} if the host is unreachable or returned an error + */ + private Boolean querySingleHostLockState(Long hostId, String lvPath, String volumeUuid) { + try { + ClvmLockTransferCommand queryCmd = new ClvmLockTransferCommand( + ClvmLockTransferCommand.Operation.QUERY_LOCK_STATE, lvPath, volumeUuid); + Answer answer = _agentMgr.send(hostId, queryCmd); + + if (answer == null || !answer.getResult()) { + logger.debug("Failed to query lock state from host {}: {}", + hostId, answer != null ? answer.getDetails() : "null answer"); + return null; + } + if (!(answer instanceof ClvmLockTransferAnswer)) { + logger.warn("Unexpected answer type from host {} for QUERY_LOCK_STATE: {}", + hostId, answer.getClass()); + return null; + } + ClvmLockTransferAnswer queryAnswer = (ClvmLockTransferAnswer) answer; + logger.debug("Host {} reports volume {} active={} (attr={})", + hostId, volumeUuid, queryAnswer.isActive(), queryAnswer.getLvAttributes()); + return queryAnswer.isActive(); + } catch (AgentUnavailableException | OperationTimedoutException e) { + logger.debug("Could not query host {} for lock state: {}", hostId, e.getMessage()); + return null; + } } /** @@ -271,6 +296,17 @@ public class ClvmPoolManager implements Configurable { } } + /** + * Transfers the CLVM exclusive lock for a volume from the source host to the destination host. + * + * @param volumeUuid The volume UUID + * @param volumeId The volume DB ID + * @param volumePath The LV name within the VG (e.g. "vm-123-disk-0") + * @param pool The storage pool + * @param sourceHostId The host currently holding the lock (pre-validated by caller) + * @param destHostId The host that should hold the lock after transfer + * @return true if the lock was successfully transferred and activated on the destination + */ public boolean transferClvmVolumeLock(String volumeUuid, Long volumeId, String volumePath, StoragePool pool, Long sourceHostId, Long destHostId) { if (pool == null) { @@ -286,21 +322,17 @@ public class ClvmPoolManager implements Configurable { String lvPath = String.format("/dev/%s/%s", vgName, volumePath); try { - Long actualLockHostId = queryCurrentLockHolder(volumeId, volumeUuid, volumePath, pool, false); + // sourceHostId is trusted as pre-validated by the caller + Long hostToDeactivate = sourceHostId; - Long hostToDeactivate = actualLockHostId != null ? actualLockHostId : sourceHostId; - - logger.info("Transferring CLVM lock for volume {}: actual holder={}, provided source={}, destination={}", - volumeUuid, actualLockHostId, sourceHostId, destHostId); + logger.info("Transferring CLVM lock for volume {}: source={}, destination={}", + volumeUuid, sourceHostId, destHostId); if (hostToDeactivate != null && !hostToDeactivate.equals(destHostId)) { HostVO deactivateHost = _hostDao.findById(hostToDeactivate); if (deactivateHost != null && deactivateHost.getStatus() == Status.Up) { ClvmLockTransferCommand deactivateCmd = new ClvmLockTransferCommand( - ClvmLockTransferCommand.Operation.DEACTIVATE, - lvPath, - volumeUuid - ); + ClvmLockTransferCommand.Operation.DEACTIVATE, lvPath, volumeUuid); Answer deactivateAnswer = _agentMgr.send(hostToDeactivate, deactivateCmd); @@ -314,8 +346,8 @@ public class ClvmPoolManager implements Configurable { logger.warn("Host {} (current lock holder) is down. Will attempt force claim on destination host {}", hostToDeactivate, destHostId); } - } else if (actualLockHostId == null) { - logger.debug("Volume {} has no active lock, will directly activate on destination", volumeUuid); + } else if (hostToDeactivate == null) { + logger.debug("Volume {} has no active lock holder, will directly activate on destination", volumeUuid); } ClvmLockTransferCommand activateCmd = new ClvmLockTransferCommand( @@ -336,7 +368,7 @@ public class ClvmPoolManager implements Configurable { setClvmLockHostId(volumeId, destHostId); logger.info("Successfully transferred CLVM lock for volume {} from host {} to host {}", - volumeUuid, actualLockHostId != null ? actualLockHostId : "none", destHostId); + volumeUuid, sourceHostId != null ? sourceHostId : "none", destHostId); return true; diff --git a/server/src/test/java/com/cloud/storage/ClvmPoolManagerTest.java b/server/src/test/java/com/cloud/storage/ClvmPoolManagerTest.java index c67f2300dd6..240d57315bd 100644 --- a/server/src/test/java/com/cloud/storage/ClvmPoolManagerTest.java +++ b/server/src/test/java/com/cloud/storage/ClvmPoolManagerTest.java @@ -246,6 +246,8 @@ public class ClvmPoolManagerTest { when(pool.getClusterId()).thenReturn(10L); when(pool.getDataCenterId()).thenReturn(1L); when(pool.getName()).thenReturn("test-pool"); + when(pool.getPath()).thenReturn(VG_NAME); + when(volsDetailsDao.findDetail(VOLUME_ID, ClvmPoolManager.CLVM_LOCK_HOST_ID)).thenReturn(null); when(hostDao.findByClusterId(10L, Host.Type.Routing)).thenReturn(Collections.emptyList()); when(hostDao.findByDataCenterId(1L)).thenReturn(Collections.emptyList()); @@ -261,15 +263,16 @@ public class ClvmPoolManagerTest { StoragePoolVO pool = Mockito.mock(StoragePoolVO.class); when(pool.getClusterId()).thenReturn(null); when(pool.getDataCenterId()).thenReturn(1L); - when(pool.getName()).thenReturn("zone-pool"); + Mockito.lenient().when(pool.getName()).thenReturn("zone-pool"); when(pool.getPath()).thenReturn(VG_NAME); + when(volsDetailsDao.findDetail(VOLUME_ID, ClvmPoolManager.CLVM_LOCK_HOST_ID)).thenReturn(null); + HostVO host = createMockHost(HOST_ID_1, "host1", Status.Up, Hypervisor.HypervisorType.KVM); when(hostDao.findByDataCenterId(1L)).thenReturn(Collections.singletonList(host)); ClvmLockTransferAnswer answer = new ClvmLockTransferAnswer(null, true, null, "host1", true, false, null); when(agentMgr.send(eq(HOST_ID_1), any(ClvmLockTransferCommand.class))).thenReturn(answer); - when(hostDao.findByName("host1")).thenReturn(host); Long result = clvmPoolManager.queryCurrentLockHolder(VOLUME_ID, VOLUME_UUID, VOLUME_PATH, pool, false); @@ -282,29 +285,33 @@ public class ClvmPoolManagerTest { public void testQueryCurrentLockHolder_SuccessfulQuery() throws AgentUnavailableException, OperationTimedoutException { StoragePoolVO pool = Mockito.mock(StoragePoolVO.class); when(pool.getClusterId()).thenReturn(10L); - when(pool.getName()).thenReturn("cluster-pool"); + Mockito.lenient().when(pool.getName()).thenReturn("cluster-pool"); when(pool.getPath()).thenReturn(VG_NAME); + when(volsDetailsDao.findDetail(VOLUME_ID, ClvmPoolManager.CLVM_LOCK_HOST_ID)).thenReturn(null); + HostVO host = createMockHost(HOST_ID_1, "host1", Status.Up, Hypervisor.HypervisorType.KVM); when(hostDao.findByClusterId(10L, Host.Type.Routing)).thenReturn(Collections.singletonList(host)); ClvmLockTransferAnswer answer = new ClvmLockTransferAnswer(null, true, null, "host1", true, false, null); when(agentMgr.send(eq(HOST_ID_1), any(ClvmLockTransferCommand.class))).thenReturn(answer); - when(hostDao.findByName("host1")).thenReturn(host); Long result = clvmPoolManager.queryCurrentLockHolder(VOLUME_ID, VOLUME_UUID, VOLUME_PATH, pool, false); Assert.assertEquals(HOST_ID_1, result); verify(agentMgr, times(1)).send(eq(HOST_ID_1), any(ClvmLockTransferCommand.class)); + verify(hostDao, never()).findByName(any()); } @Test public void testQueryCurrentLockHolder_VolumeNotLocked() throws AgentUnavailableException, OperationTimedoutException { StoragePoolVO pool = Mockito.mock(StoragePoolVO.class); when(pool.getClusterId()).thenReturn(10L); - when(pool.getName()).thenReturn("cluster-pool"); + Mockito.lenient().when(pool.getName()).thenReturn("cluster-pool"); when(pool.getPath()).thenReturn(VG_NAME); + when(volsDetailsDao.findDetail(VOLUME_ID, ClvmPoolManager.CLVM_LOCK_HOST_ID)).thenReturn(null); + HostVO host = createMockHost(HOST_ID_1, "host1", Status.Up, Hypervisor.HypervisorType.KVM); when(hostDao.findByClusterId(10L, Host.Type.Routing)).thenReturn(Collections.singletonList(host)); @@ -320,9 +327,11 @@ public class ClvmPoolManagerTest { public void testQueryCurrentLockHolder_EmptyHostname() throws AgentUnavailableException, OperationTimedoutException { StoragePoolVO pool = Mockito.mock(StoragePoolVO.class); when(pool.getClusterId()).thenReturn(10L); - when(pool.getName()).thenReturn("cluster-pool"); + Mockito.lenient().when(pool.getName()).thenReturn("cluster-pool"); when(pool.getPath()).thenReturn(VG_NAME); + when(volsDetailsDao.findDetail(VOLUME_ID, ClvmPoolManager.CLVM_LOCK_HOST_ID)).thenReturn(null); + HostVO host = createMockHost(HOST_ID_1, "host1", Status.Up, Hypervisor.HypervisorType.KVM); when(hostDao.findByClusterId(10L, Host.Type.Routing)).thenReturn(Collections.singletonList(host)); @@ -338,28 +347,32 @@ public class ClvmPoolManagerTest { public void testQueryCurrentLockHolder_HostnameNotResolved() throws AgentUnavailableException, OperationTimedoutException { StoragePoolVO pool = Mockito.mock(StoragePoolVO.class); when(pool.getClusterId()).thenReturn(10L); - when(pool.getName()).thenReturn("cluster-pool"); + Mockito.lenient().when(pool.getName()).thenReturn("cluster-pool"); when(pool.getPath()).thenReturn(VG_NAME); + when(volsDetailsDao.findDetail(VOLUME_ID, ClvmPoolManager.CLVM_LOCK_HOST_ID)).thenReturn(null); + HostVO host = createMockHost(HOST_ID_1, "host1", Status.Up, Hypervisor.HypervisorType.KVM); when(hostDao.findByClusterId(10L, Host.Type.Routing)).thenReturn(Collections.singletonList(host)); ClvmLockTransferAnswer answer = new ClvmLockTransferAnswer(null, true, null, "unknown-host", true, false, null); when(agentMgr.send(eq(HOST_ID_1), any(ClvmLockTransferCommand.class))).thenReturn(answer); - when(hostDao.findByName("unknown-host")).thenReturn(null); Long result = clvmPoolManager.queryCurrentLockHolder(VOLUME_ID, VOLUME_UUID, VOLUME_PATH, pool, false); - Assert.assertNull(result); + Assert.assertEquals(HOST_ID_1, result); + verify(hostDao, never()).findByName(any()); } @Test public void testQueryCurrentLockHolder_QueryFails() throws AgentUnavailableException, OperationTimedoutException { StoragePoolVO pool = Mockito.mock(StoragePoolVO.class); when(pool.getClusterId()).thenReturn(10L); - when(pool.getName()).thenReturn("cluster-pool"); + Mockito.lenient().when(pool.getName()).thenReturn("cluster-pool"); when(pool.getPath()).thenReturn(VG_NAME); + when(volsDetailsDao.findDetail(VOLUME_ID, ClvmPoolManager.CLVM_LOCK_HOST_ID)).thenReturn(null); + HostVO host = createMockHost(HOST_ID_1, "host1", Status.Up, Hypervisor.HypervisorType.KVM); when(hostDao.findByClusterId(10L, Host.Type.Routing)).thenReturn(Collections.singletonList(host)); @@ -375,9 +388,11 @@ public class ClvmPoolManagerTest { public void testQueryCurrentLockHolder_NullAnswer() throws AgentUnavailableException, OperationTimedoutException { StoragePoolVO pool = Mockito.mock(StoragePoolVO.class); when(pool.getClusterId()).thenReturn(10L); - when(pool.getName()).thenReturn("cluster-pool"); + Mockito.lenient().when(pool.getName()).thenReturn("cluster-pool"); when(pool.getPath()).thenReturn(VG_NAME); + when(volsDetailsDao.findDetail(VOLUME_ID, ClvmPoolManager.CLVM_LOCK_HOST_ID)).thenReturn(null); + HostVO host = createMockHost(HOST_ID_1, "host1", Status.Up, Hypervisor.HypervisorType.KVM); when(hostDao.findByClusterId(10L, Host.Type.Routing)).thenReturn(Collections.singletonList(host)); @@ -392,9 +407,11 @@ public class ClvmPoolManagerTest { public void testQueryCurrentLockHolder_AgentUnavailableException() throws AgentUnavailableException, OperationTimedoutException { StoragePoolVO pool = Mockito.mock(StoragePoolVO.class); when(pool.getClusterId()).thenReturn(10L); - when(pool.getName()).thenReturn("cluster-pool"); + Mockito.lenient().when(pool.getName()).thenReturn("cluster-pool"); when(pool.getPath()).thenReturn(VG_NAME); + when(volsDetailsDao.findDetail(VOLUME_ID, ClvmPoolManager.CLVM_LOCK_HOST_ID)).thenReturn(null); + HostVO host = createMockHost(HOST_ID_1, "host1", Status.Up, Hypervisor.HypervisorType.KVM); when(hostDao.findByClusterId(10L, Host.Type.Routing)).thenReturn(Collections.singletonList(host)); @@ -410,9 +427,11 @@ public class ClvmPoolManagerTest { public void testQueryCurrentLockHolder_OperationTimedoutException() throws AgentUnavailableException, OperationTimedoutException { StoragePoolVO pool = Mockito.mock(StoragePoolVO.class); when(pool.getClusterId()).thenReturn(10L); - when(pool.getName()).thenReturn("cluster-pool"); + Mockito.lenient().when(pool.getName()).thenReturn("cluster-pool"); when(pool.getPath()).thenReturn(VG_NAME); + when(volsDetailsDao.findDetail(VOLUME_ID, ClvmPoolManager.CLVM_LOCK_HOST_ID)).thenReturn(null); + HostVO host = createMockHost(HOST_ID_1, "host1", Status.Up, Hypervisor.HypervisorType.KVM); when(hostDao.findByClusterId(10L, Host.Type.Routing)).thenReturn(Collections.singletonList(host)); @@ -427,51 +446,60 @@ public class ClvmPoolManagerTest { @Test public void testQueryCurrentLockHolder_UpdateDatabase_MatchingValue() throws AgentUnavailableException, OperationTimedoutException { StoragePoolVO pool = Mockito.mock(StoragePoolVO.class); - when(pool.getClusterId()).thenReturn(10L); - when(pool.getName()).thenReturn("cluster-pool"); + Mockito.lenient().when(pool.getClusterId()).thenReturn(10L); + Mockito.lenient().when(pool.getName()).thenReturn("cluster-pool"); when(pool.getPath()).thenReturn(VG_NAME); HostVO host = createMockHost(HOST_ID_1, "host1", Status.Up, Hypervisor.HypervisorType.KVM); - when(hostDao.findByClusterId(10L, Host.Type.Routing)).thenReturn(Collections.singletonList(host)); + Mockito.lenient().when(hostDao.findByClusterId(10L, Host.Type.Routing)).thenReturn(Collections.singletonList(host)); + // DB has correct value, fast path: query HOST_ID_1, returns active VolumeDetailVO detail = new VolumeDetailVO(); detail.setValue(String.valueOf(HOST_ID_1)); when(volsDetailsDao.findDetail(VOLUME_ID, ClvmPoolManager.CLVM_LOCK_HOST_ID)).thenReturn(detail); + when(hostDao.findById(HOST_ID_1)).thenReturn(host); ClvmLockTransferAnswer answer = new ClvmLockTransferAnswer(null, true, null, "host1", true, false, null); when(agentMgr.send(eq(HOST_ID_1), any(ClvmLockTransferCommand.class))).thenReturn(answer); - when(hostDao.findByName("host1")).thenReturn(host); Long result = clvmPoolManager.queryCurrentLockHolder(VOLUME_ID, VOLUME_UUID, VOLUME_PATH, pool, true); Assert.assertEquals(HOST_ID_1, result); + // Fast path succeeded - no DB write needed, no fan-out verify(volsDetailsDao, never()).update(anyLong(), any()); verify(volsDetailsDao, never()).addDetail(anyLong(), any(), any(), Mockito.anyBoolean()); + verify(hostDao, never()).findByClusterId(anyLong(), any()); } @Test public void testQueryCurrentLockHolder_UpdateDatabase_DifferentValue() throws AgentUnavailableException, OperationTimedoutException { StoragePoolVO pool = Mockito.mock(StoragePoolVO.class); when(pool.getClusterId()).thenReturn(10L); - when(pool.getName()).thenReturn("cluster-pool"); + Mockito.lenient().when(pool.getName()).thenReturn("cluster-pool"); when(pool.getPath()).thenReturn(VG_NAME); - HostVO host = createMockHost(HOST_ID_2, "host2", Status.Up, Hypervisor.HypervisorType.KVM); - when(hostDao.findByClusterId(10L, Host.Type.Routing)).thenReturn(Collections.singletonList(host)); - + // DB says HOST_ID_1, but actual lock is on HOST_ID_2 + // Fast path: query HOST_ID_1, inactive: fall back to fan-out VolumeDetailVO detail = Mockito.mock(VolumeDetailVO.class); - - detail.setValue(String.valueOf(HOST_ID_1)); + when(detail.getValue()).thenReturn(String.valueOf(HOST_ID_1)); when(volsDetailsDao.findDetail(VOLUME_ID, ClvmPoolManager.CLVM_LOCK_HOST_ID)).thenReturn(detail); when(detail.getId()).thenReturn(99L); - ClvmLockTransferAnswer answer = new ClvmLockTransferAnswer(null, true, null, "host2", true, false, null); - when(agentMgr.send(eq(HOST_ID_2), any(ClvmLockTransferCommand.class))).thenReturn(answer); - when(hostDao.findByName("host2")).thenReturn(host); + HostVO host1 = createMockHost(HOST_ID_1, "host1", Status.Up, Hypervisor.HypervisorType.KVM); + HostVO host2 = createMockHost(HOST_ID_2, "host2", Status.Up, Hypervisor.HypervisorType.KVM); + when(hostDao.findById(HOST_ID_1)).thenReturn(host1); + when(hostDao.findByClusterId(10L, Host.Type.Routing)).thenReturn(Arrays.asList(host1, host2)); + + // HOST_ID_1 reports inactive (fast path miss), HOST_ID_2 reports active (fan-out) + ClvmLockTransferAnswer inactiveAnswer = new ClvmLockTransferAnswer(null, true, null, "host1", false, false, null); + ClvmLockTransferAnswer activeAnswer = new ClvmLockTransferAnswer(null, true, null, "host2", true, false, null); + when(agentMgr.send(eq(HOST_ID_1), any(ClvmLockTransferCommand.class))).thenReturn(inactiveAnswer); + when(agentMgr.send(eq(HOST_ID_2), any(ClvmLockTransferCommand.class))).thenReturn(activeAnswer); Long result = clvmPoolManager.queryCurrentLockHolder(VOLUME_ID, VOLUME_UUID, VOLUME_PATH, pool, true); Assert.assertEquals(HOST_ID_2, result); + // DB should be corrected to HOST_ID_2 verify(detail, times(1)).setValue(String.valueOf(HOST_ID_2)); verify(volsDetailsDao, times(1)).update(eq(99L), eq(detail)); } @@ -480,17 +508,17 @@ public class ClvmPoolManagerTest { public void testQueryCurrentLockHolder_UpdateDatabase_NoExistingDetail() throws AgentUnavailableException, OperationTimedoutException { StoragePoolVO pool = Mockito.mock(StoragePoolVO.class); when(pool.getClusterId()).thenReturn(10L); - when(pool.getName()).thenReturn("cluster-pool"); + Mockito.lenient().when(pool.getName()).thenReturn("cluster-pool"); when(pool.getPath()).thenReturn(VG_NAME); + // No DB record, fast path skipped, fan-out finds HOST_ID_1 + when(volsDetailsDao.findDetail(VOLUME_ID, ClvmPoolManager.CLVM_LOCK_HOST_ID)).thenReturn(null); + HostVO host = createMockHost(HOST_ID_1, "host1", Status.Up, Hypervisor.HypervisorType.KVM); when(hostDao.findByClusterId(10L, Host.Type.Routing)).thenReturn(Collections.singletonList(host)); - when(volsDetailsDao.findDetail(VOLUME_ID, ClvmPoolManager.CLVM_LOCK_HOST_ID)).thenReturn(null); - ClvmLockTransferAnswer answer = new ClvmLockTransferAnswer(null, true, null, "host1", true, false, null); when(agentMgr.send(eq(HOST_ID_1), any(ClvmLockTransferCommand.class))).thenReturn(answer); - when(hostDao.findByName("host1")).thenReturn(host); Long result = clvmPoolManager.queryCurrentLockHolder(VOLUME_ID, VOLUME_UUID, VOLUME_PATH, pool, true); @@ -503,16 +531,20 @@ public class ClvmPoolManagerTest { public void testQueryCurrentLockHolder_UpdateDatabase_RemoveDetailWhenUnlocked() throws AgentUnavailableException, OperationTimedoutException { StoragePoolVO pool = Mockito.mock(StoragePoolVO.class); when(pool.getClusterId()).thenReturn(10L); - when(pool.getName()).thenReturn("cluster-pool"); + Mockito.lenient().when(pool.getName()).thenReturn("cluster-pool"); when(pool.getPath()).thenReturn(VG_NAME); - HostVO host = createMockHost(HOST_ID_1, "host1", Status.Up, Hypervisor.HypervisorType.KVM); - when(hostDao.findByClusterId(10L, Host.Type.Routing)).thenReturn(Collections.singletonList(host)); - + // DB has HOST_ID_1, fast path query returns inactive: fan-out also finds nothing VolumeDetailVO detail = Mockito.mock(VolumeDetailVO.class); when(detail.getId()).thenReturn(99L); + when(detail.getValue()).thenReturn(String.valueOf(HOST_ID_1)); when(volsDetailsDao.findDetail(VOLUME_ID, ClvmPoolManager.CLVM_LOCK_HOST_ID)).thenReturn(detail); + HostVO host = createMockHost(HOST_ID_1, "host1", Status.Up, Hypervisor.HypervisorType.KVM); + when(hostDao.findById(HOST_ID_1)).thenReturn(host); + when(hostDao.findByClusterId(10L, Host.Type.Routing)).thenReturn(Collections.singletonList(host)); + + // Both fast path and fan-out report inactive ClvmLockTransferAnswer answer = new ClvmLockTransferAnswer(null, true, null, null, false, false, null); when(agentMgr.send(eq(HOST_ID_1), any(ClvmLockTransferCommand.class))).thenReturn(answer); @@ -526,16 +558,17 @@ public class ClvmPoolManagerTest { public void testQueryCurrentLockHolder_SkipsNonKVMHosts() throws AgentUnavailableException, OperationTimedoutException { StoragePoolVO pool = Mockito.mock(StoragePoolVO.class); when(pool.getClusterId()).thenReturn(10L); - when(pool.getName()).thenReturn("cluster-pool"); + Mockito.lenient().when(pool.getName()).thenReturn("cluster-pool"); when(pool.getPath()).thenReturn(VG_NAME); + when(volsDetailsDao.findDetail(VOLUME_ID, ClvmPoolManager.CLVM_LOCK_HOST_ID)).thenReturn(null); + HostVO xenHost = createMockHost(10L, "xen-host", Status.Up, Hypervisor.HypervisorType.XenServer); HostVO kvmHost = createMockHost(HOST_ID_1, "kvm-host", Status.Up, Hypervisor.HypervisorType.KVM); when(hostDao.findByClusterId(10L, Host.Type.Routing)).thenReturn(Arrays.asList(xenHost, kvmHost)); ClvmLockTransferAnswer answer = new ClvmLockTransferAnswer(null, true, null, "kvm-host", true, false, null); when(agentMgr.send(eq(HOST_ID_1), any(ClvmLockTransferCommand.class))).thenReturn(answer); - when(hostDao.findByName("kvm-host")).thenReturn(kvmHost); Long result = clvmPoolManager.queryCurrentLockHolder(VOLUME_ID, VOLUME_UUID, VOLUME_PATH, pool, false); @@ -548,16 +581,17 @@ public class ClvmPoolManagerTest { public void testQueryCurrentLockHolder_SkipsDownHosts() throws AgentUnavailableException, OperationTimedoutException { StoragePoolVO pool = Mockito.mock(StoragePoolVO.class); when(pool.getClusterId()).thenReturn(10L); - when(pool.getName()).thenReturn("cluster-pool"); + Mockito.lenient().when(pool.getName()).thenReturn("cluster-pool"); when(pool.getPath()).thenReturn(VG_NAME); + when(volsDetailsDao.findDetail(VOLUME_ID, ClvmPoolManager.CLVM_LOCK_HOST_ID)).thenReturn(null); + HostVO downHost = createMockHost(10L, "down-host", Status.Down, Hypervisor.HypervisorType.KVM); HostVO upHost = createMockHost(HOST_ID_1, "up-host", Status.Up, Hypervisor.HypervisorType.KVM); when(hostDao.findByClusterId(10L, Host.Type.Routing)).thenReturn(Arrays.asList(downHost, upHost)); ClvmLockTransferAnswer answer = new ClvmLockTransferAnswer(null, true, null, "up-host", true, false, null); when(agentMgr.send(eq(HOST_ID_1), any(ClvmLockTransferCommand.class))).thenReturn(answer); - when(hostDao.findByName("up-host")).thenReturn(upHost); Long result = clvmPoolManager.queryCurrentLockHolder(VOLUME_ID, VOLUME_UUID, VOLUME_PATH, pool, false); @@ -570,20 +604,114 @@ public class ClvmPoolManagerTest { public void testQueryCurrentLockHolder_PathWithLeadingSlash() throws AgentUnavailableException, OperationTimedoutException { StoragePoolVO pool = Mockito.mock(StoragePoolVO.class); when(pool.getClusterId()).thenReturn(10L); - when(pool.getName()).thenReturn("cluster-pool"); + Mockito.lenient().when(pool.getName()).thenReturn("cluster-pool"); when(pool.getPath()).thenReturn("/" + VG_NAME); + when(volsDetailsDao.findDetail(VOLUME_ID, ClvmPoolManager.CLVM_LOCK_HOST_ID)).thenReturn(null); + HostVO host = createMockHost(HOST_ID_1, "host1", Status.Up, Hypervisor.HypervisorType.KVM); when(hostDao.findByClusterId(10L, Host.Type.Routing)).thenReturn(Collections.singletonList(host)); ClvmLockTransferAnswer answer = new ClvmLockTransferAnswer(null, true, null, "host1", true, false, null); when(agentMgr.send(eq(HOST_ID_1), any(ClvmLockTransferCommand.class))).thenReturn(answer); - when(hostDao.findByName("host1")).thenReturn(host); Long result = clvmPoolManager.queryCurrentLockHolder(VOLUME_ID, VOLUME_UUID, VOLUME_PATH, pool, false); Assert.assertEquals(HOST_ID_1, result); } + + /** + * Fast path: DB has the correct host, single query confirms isActive=true. + * No fan-out should occur. + */ + @Test + public void testQueryCurrentLockHolder_FastPath_HitOnDbHost() throws AgentUnavailableException, OperationTimedoutException { + StoragePoolVO pool = Mockito.mock(StoragePoolVO.class); + when(pool.getPath()).thenReturn(VG_NAME); + + VolumeDetailVO detail = new VolumeDetailVO(); + detail.setValue(String.valueOf(HOST_ID_1)); + when(volsDetailsDao.findDetail(VOLUME_ID, ClvmPoolManager.CLVM_LOCK_HOST_ID)).thenReturn(detail); + + HostVO host = createMockHost(HOST_ID_1, "host1", Status.Up, Hypervisor.HypervisorType.KVM); + when(hostDao.findById(HOST_ID_1)).thenReturn(host); + + ClvmLockTransferAnswer activeAnswer = new ClvmLockTransferAnswer(null, true, null, "host1", true, false, null); + when(agentMgr.send(eq(HOST_ID_1), any(ClvmLockTransferCommand.class))).thenReturn(activeAnswer); + + Long result = clvmPoolManager.queryCurrentLockHolder(VOLUME_ID, VOLUME_UUID, VOLUME_PATH, pool, false); + + Assert.assertEquals(HOST_ID_1, result); + // Only one agent call, no cluster host lookup, no fan-out + verify(agentMgr, times(1)).send(eq(HOST_ID_1), any(ClvmLockTransferCommand.class)); + verify(hostDao, never()).findByClusterId(anyLong(), any()); + verify(hostDao, never()).findByDataCenterId(anyLong()); + } + + /** + * Fast path miss: DB has HOST_ID_1 but it's inactive. Fan-out finds HOST_ID_2. + * HOST_ID_1 should NOT be queried again during fan-out. + */ + @Test + public void testQueryCurrentLockHolder_FastPath_MissDbHost_FanOutFindsOther() throws AgentUnavailableException, OperationTimedoutException { + StoragePoolVO pool = Mockito.mock(StoragePoolVO.class); + when(pool.getClusterId()).thenReturn(10L); + when(pool.getPath()).thenReturn(VG_NAME); + + VolumeDetailVO detail = new VolumeDetailVO(); + detail.setValue(String.valueOf(HOST_ID_1)); + when(volsDetailsDao.findDetail(VOLUME_ID, ClvmPoolManager.CLVM_LOCK_HOST_ID)).thenReturn(detail); + + HostVO host1 = createMockHost(HOST_ID_1, "host1", Status.Up, Hypervisor.HypervisorType.KVM); + HostVO host2 = createMockHost(HOST_ID_2, "host2", Status.Up, Hypervisor.HypervisorType.KVM); + when(hostDao.findById(HOST_ID_1)).thenReturn(host1); + when(hostDao.findByClusterId(10L, Host.Type.Routing)).thenReturn(Arrays.asList(host1, host2)); + + // Fast path: HOST_ID_1 inactive + ClvmLockTransferAnswer inactiveAnswer = new ClvmLockTransferAnswer(null, true, null, "host1", false, false, null); + // Fan-out: HOST_ID_2 active (HOST_ID_1 skipped) + ClvmLockTransferAnswer activeAnswer = new ClvmLockTransferAnswer(null, true, null, "host2", true, false, null); + when(agentMgr.send(eq(HOST_ID_1), any(ClvmLockTransferCommand.class))).thenReturn(inactiveAnswer); + when(agentMgr.send(eq(HOST_ID_2), any(ClvmLockTransferCommand.class))).thenReturn(activeAnswer); + + Long result = clvmPoolManager.queryCurrentLockHolder(VOLUME_ID, VOLUME_UUID, VOLUME_PATH, pool, false); + + Assert.assertEquals(HOST_ID_2, result); + // HOST_ID_1 queried once (fast path only), HOST_ID_2 queried once (fan-out) + verify(agentMgr, times(1)).send(eq(HOST_ID_1), any(ClvmLockTransferCommand.class)); + verify(agentMgr, times(1)).send(eq(HOST_ID_2), any(ClvmLockTransferCommand.class)); + } + + /** + * Fast path skip: DB host is DOWN. Fan-out proceeds to all UP hosts. + */ + @Test + public void testQueryCurrentLockHolder_FastPath_DbHostDown_FanOut() throws AgentUnavailableException, OperationTimedoutException { + StoragePoolVO pool = Mockito.mock(StoragePoolVO.class); + when(pool.getClusterId()).thenReturn(10L); + when(pool.getPath()).thenReturn(VG_NAME); + + VolumeDetailVO detail = new VolumeDetailVO(); + detail.setValue(String.valueOf(HOST_ID_1)); + when(volsDetailsDao.findDetail(VOLUME_ID, ClvmPoolManager.CLVM_LOCK_HOST_ID)).thenReturn(detail); + + HostVO downHost = createMockHost(HOST_ID_1, "host1", Status.Down, Hypervisor.HypervisorType.KVM); + HostVO upHost = createMockHost(HOST_ID_2, "host2", Status.Up, Hypervisor.HypervisorType.KVM); + when(hostDao.findById(HOST_ID_1)).thenReturn(downHost); + when(hostDao.findByClusterId(10L, Host.Type.Routing)).thenReturn(Arrays.asList(downHost, upHost)); + + ClvmLockTransferAnswer activeAnswer = new ClvmLockTransferAnswer(null, true, null, "host2", true, false, null); + when(agentMgr.send(eq(HOST_ID_2), any(ClvmLockTransferCommand.class))).thenReturn(activeAnswer); + + Long result = clvmPoolManager.queryCurrentLockHolder(VOLUME_ID, VOLUME_UUID, VOLUME_PATH, pool, false); + + Assert.assertEquals(HOST_ID_2, result); + // No query to the down host at all + verify(agentMgr, never()).send(eq(HOST_ID_1), any(ClvmLockTransferCommand.class)); + verify(agentMgr, times(1)).send(eq(HOST_ID_2), any(ClvmLockTransferCommand.class)); + } + + // Helper method to create mock hosts private HostVO createMockHost(Long id, String name, Status status, Hypervisor.HypervisorType hypervisor) { HostVO host = Mockito.mock(HostVO.class);