bug 12348: fixed getting VMs for a cluster which was not getting back VMs in starting state

This commit is contained in:
Abhinandan Prateek 2011-12-13 12:20:48 +05:30
parent 35e2640019
commit a703315d34
7 changed files with 112 additions and 38 deletions

View File

@ -27,6 +27,7 @@ public class ClusterSyncAnswer extends Answer {
private HashMap<String, Pair<String, State>> _newStates;
private HashMap<String, Pair<String, State>> _allStates;
private int _type = -1; // 0 for full, 1 for delta
private boolean _isExecuted=false;
public static final int FULL_SYNC=0;
public static final int DELTA_SYNC=1;
@ -38,6 +39,17 @@ public class ClusterSyncAnswer extends Answer {
_type = -1;
}
// this is here because a cron command answer is being sent twice
// AgentAttache.processAnswers
// AgentManagerImpl.notifyAnswersToMonitors
public boolean isExceuted(){
return _isExecuted;
}
public void setExecuted(){
_isExecuted = true;
}
public ClusterSyncAnswer(long clusterId, HashMap<String, Pair<String, State>> newStates){
_clusterId = clusterId;

View File

@ -6573,7 +6573,6 @@ public abstract class CitrixResourceBase implements ServerResource, HypervisorRe
protected HashMap<String, Pair<String, State>> fullClusterSync(Connection conn) {
XenServerPoolVms vms = new XenServerPoolVms();
try {
Host lhost = Host.getByUuid(conn, _host.uuid);
Map<VM, VM.Record> vm_map = VM.getAllRecords(conn); //USE THIS TO GET ALL VMS FROM A CLUSTER
for (VM.Record record: vm_map.values()) {
if (record.isControlDomain || record.isASnapshot || record.isATemplate) {
@ -6627,6 +6626,7 @@ public abstract class CitrixResourceBase implements ServerResource, HypervisorRe
//check if host is changed
if (host_uuid != null && oldState != null){
if (!host_uuid.equals(oldState.first()) && newState != State.Stopped && newState != State.Stopping){
s_logger.warn("Detecting a change in host for " + vm);
changes.put(vm, new Pair<String, State>(host_uuid, newState));
s_vms.put(_cluster, host_uuid, vm, newState);
continue;
@ -6636,7 +6636,7 @@ public abstract class CitrixResourceBase implements ServerResource, HypervisorRe
if (newState == State.Stopped && oldState != null && oldState.second() != State.Stopping && oldState.second() != State.Stopped) {
newState = getRealPowerState(conn, vm);
}
if (s_logger.isTraceEnabled()) {
s_logger.trace("VM " + vm + ": xen has state " + newState + " and we have state " + (oldState != null ? oldState.toString() : "null"));
}
@ -6682,7 +6682,7 @@ public abstract class CitrixResourceBase implements ServerResource, HypervisorRe
final String vm = entry.getKey();
final State oldState = entry.getValue().second();
String host_uuid = entry.getValue().first();
if (s_logger.isTraceEnabled()) {
s_logger.trace("VM " + vm + " is now missing from xen so reporting stopped");
}
@ -6697,12 +6697,11 @@ public abstract class CitrixResourceBase implements ServerResource, HypervisorRe
} else if (oldState == State.Migrating) {
s_logger.warn("Ignoring VM " + vm + " in migrating state.");
} else {
//State newState = State.Stopped;
//changes.put(vm, new Pair<String, State>(host_uuid, newState));
State newState = State.Stopped;
changes.put(vm, new Pair<String, State>(host_uuid, newState));
}
}
}
return changes;
}

View File

@ -13,8 +13,9 @@ import com.cloud.vm.VirtualMachine.State;
public class XenServerPoolVms {
private static final Logger s_logger = Logger.getLogger(XenServerPoolVms.class);
HashMap<String/* clusterId */, HashMap<String/* vm name */, Pair<String/* host uuid */, State/* vm state */>>> _cluster_vms =
private HashMap<String/* clusterId */, HashMap<String/* vm name */, Pair<String/* host uuid */, State/* vm state */>>> _cluster_vms =
new HashMap<String, HashMap<String, Pair<String, State>>>();
private long _last_sync_time=0;
public HashMap<String, Pair<String, State>> getClusterVmState(String clusterId){
HashMap<String, Pair<String, State>> _vms= _cluster_vms.get(clusterId);
@ -64,10 +65,9 @@ public class XenServerPoolVms {
HashMap<String, Pair<String, State>> vms= getClusterVmState(clusterId);
return vms.size();
}
public static void main(String args[]){
XenServerPoolVms vms = new XenServerPoolVms();
public void initSyncTime(){
_last_sync_time = System.currentTimeMillis();
}
@Override

View File

@ -159,8 +159,7 @@ public enum Config {
PingInterval("Advanced", AgentManager.class, Integer.class, "ping.interval", "60", "Ping interval in seconds", null),
PingTimeout("Advanced", AgentManager.class, Float.class, "ping.timeout", "2.5", "Multiplier to ping.interval before announcing an agent has timed out", null),
ClusterDeltaSyncInterval("Advanced", AgentManager.class, Integer.class, "sync.interval", "60", "Cluster Delta sync interval in seconds", null),
ClusterFullSyncSkipSteps("Advanced", AgentManager.class, Integer.class,
"skip.steps", "525600", "Cluster full sync skip steps count", null),
ClusterFullSyncSkipSteps("Advanced", AgentManager.class, Integer.class, "skip.steps", "60", "Cluster full sync skip steps count", null),
Port("Advanced", AgentManager.class, Integer.class, "port", "8250", "Port to listen on for agent connection.", null),
RouterCpuMHz("Advanced", NetworkManager.class, Integer.class, "router.cpu.mhz", String.valueOf(VirtualNetworkApplianceManager.DEFAULT_ROUTER_CPU_MHZ), "Default CPU speed (MHz) for router VM.", null),
RestartRetryInterval("Advanced", HighAvailabilityManager.class, Integer.class, "restart.retry.interval", "600", "Time (in seconds) between retries to restart a vm", null),

View File

@ -139,6 +139,7 @@ import com.cloud.utils.component.Inject;
import com.cloud.utils.concurrency.NamedThreadFactory;
import com.cloud.utils.db.DB;
import com.cloud.utils.db.GlobalLock;
import com.cloud.utils.db.SearchCriteria;
import com.cloud.utils.db.Transaction;
import com.cloud.utils.exception.CloudRuntimeException;
import com.cloud.utils.exception.ExecutionException;
@ -1608,6 +1609,27 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene
commands.addCommand(command);
}
}
final List<? extends VMInstanceVO> vmsz = _vmDao.listByHostId(hostId);
s_logger.debug("Found " + vmsz.size() + " VMs for host " + hostId);
for (VMInstanceVO vm : vmsz) {
AgentVmInfo info = infos.remove(vm.getId());
VMInstanceVO castedVm = null;
if (info == null) {
info = new AgentVmInfo(vm.getInstanceName(), getVmGuru(vm), vm, State.Stopped);
castedVm = info.guru.findById(vm.getId());
} else {
castedVm = info.vm;
}
HypervisorGuru hvGuru = _hvGuruMgr.getGuru(castedVm.getHypervisorType());
Command command = compareState(hostId, castedVm, info, true, hvGuru.trackVmHostChange());
if (command != null) {
commands.addCommand(command);
}
}
for (final AgentVmInfo left : infos.values()) {
boolean found = false;
@ -1629,7 +1651,7 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene
}
}
if ( ! found ) {
s_logger.warn("Stopping a VM that we have no record of: " + left.name);
s_logger.warn("Stopping a VM that we have no record of <fullHostSync>: " + left.name);
commands.addCommand(cleanup(left.name));
}
}
@ -1667,50 +1689,70 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene
public Commands deltaSync(Map<String, Pair<String, State>> newStates) {
public void deltaSync(Map<String, Pair<String, State>> newStates) {
Map<Long, AgentVmInfo> states = convertToInfos(newStates);
Commands commands = new Commands(OnError.Continue);
for (Map.Entry<Long, AgentVmInfo> entry : states.entrySet()) {
AgentVmInfo info = entry.getValue();
VMInstanceVO vm = info.vm;
Command command = null;
if (vm != null) {
String hostGuid = info.getHostUuid();
Host host = _resourceMgr.findHostByGuid(hostGuid);
Host host = _resourceMgr.findHostByGuid(info.getHostUuid());
long hId = host.getId();
HypervisorGuru hvGuru = _hvGuruMgr.getGuru(vm.getHypervisorType());
command = compareState(hId, vm, info, false, hvGuru.trackVmHostChange());
} else {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Cleaning up a VM that is no longer found: " + info.name);
s_logger.debug("Cleaning up a VM that is no longer found <deltaSync>: " + info.name);
}
command = cleanup(info.name);
}
if (command != null) {
commands.addCommand(command);
if (command != null){
try {
Host host = _resourceMgr.findHostByGuid(info.getHostUuid());
if (host != null){
Answer answer = _agentMgr.send(host.getId(), cleanup(info.name));
if (!answer.getResult()) {
s_logger.warn("Unable to stop a VM due to " + answer.getDetails());
}
}
} catch (Exception e) {
s_logger.warn("Unable to stop a VM due to " + e.getMessage());
}
}
}
return commands;
}
public Commands fullSync(final long clusterId, Map<String, Pair<String, State>> newStates) {
Commands commands = new Commands(OnError.Continue);
public void fullSync(final long clusterId, Map<String, Pair<String, State>> newStates) {
Map<Long, AgentVmInfo> infos = convertToInfos(newStates);
long hId = 0;
final List<VMInstanceVO> vms = _vmDao.listByClusterId(clusterId);
List<VMInstanceVO> vms = _vmDao.listByClusterId(clusterId);
for (VMInstanceVO vm : vms) {
AgentVmInfo info = infos.remove(vm.getId());
if (vm.isRemoved() || vm.getState() == State.Destroyed || vm.getState() == State.Expunging) continue;
infos.remove(vm.getId());
}
// some VMs may be starting and will have last host id null
vms = _vmDao.listStartingByClusterId(clusterId);
for (VMInstanceVO vm : vms) {
if (vm.isRemoved() || vm.getState() == State.Destroyed || vm.getState() == State.Expunging) continue;
infos.remove(vm.getId());
}
for (final AgentVmInfo left : infos.values()) {
s_logger.warn("Stopping a VM that we have no record of: " + left.name);
commands.addCommand(cleanup(left.name));
try {
Host host = _resourceMgr.findHostByGuid(left.getHostUuid());
if (host != null){
s_logger.warn("Stopping a VM which we do not have any record of " + left.name);
Answer answer = _agentMgr.send(host.getId(), cleanup(left.name));
if (!answer.getResult()) {
s_logger.warn("Unable to stop a VM due to " + answer.getDetails());
}
}
} catch (Exception e) {
s_logger.warn("Unable to stop a VM due to " + e.getMessage());
}
}
return commands;
}
@ -2040,11 +2082,14 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene
for (final Answer answer : answers) {
if (answer instanceof ClusterSyncAnswer) {
ClusterSyncAnswer hs = (ClusterSyncAnswer) answer;
if (hs.isFull()) {
deltaSync(hs.getNewStates());
fullSync(hs.getClusterId(), hs.getAllStates());
} else if (hs.isDelta()){
deltaSync(hs.getNewStates());
if (!hs.isExceuted()){
if (hs.isFull()) {
deltaSync(hs.getNewStates());
fullSync(hs.getClusterId(), hs.getAllStates());
} else if (hs.isDelta()){
deltaSync(hs.getNewStates());
}
hs.setExecuted();
}
} else if (!answer.getResult()) {
s_logger.warn("Cleanup failed due to " + answer.getDetails());

View File

@ -83,7 +83,9 @@ public interface VMInstanceDao extends GenericDao<VMInstanceVO, Long>, StateDao<
List<VMInstanceVO> listByAccountId(long accountId);
public Long countAllocatedVirtualRoutersForAccount(long accountId);
List<VMInstanceVO> listByClusterId(long clusterId);
List<VMInstanceVO> listByClusterId(long clusterId); // this does not pull up VMs which are starting
List<VMInstanceVO> listStartingByClusterId(long clusterId); // get all the VMs even starting one on this cluster
List<VMInstanceVO> listVmsMigratingFromHost(Long hostId);
public Long countRunningByHostId(long hostId);

View File

@ -59,6 +59,7 @@ public class VMInstanceDaoImpl extends GenericDaoBase<VMInstanceVO, Long> implem
public static final Logger s_logger = Logger.getLogger(VMInstanceDaoImpl.class);
protected final SearchBuilder<VMInstanceVO> VMClusterSearch;
protected final SearchBuilder<VMInstanceVO> StartingVMClusterSearch;
protected final SearchBuilder<VMInstanceVO> IdStatesSearch;
protected final SearchBuilder<VMInstanceVO> AllFieldsSearch;
protected final SearchBuilder<VMInstanceVO> ZoneTemplateNonExpungedSearch;
@ -87,6 +88,7 @@ public class VMInstanceDaoImpl extends GenericDaoBase<VMInstanceVO, Long> implem
" GROUP BY host.id ORDER BY 2 ASC ";
protected final HostDaoImpl _hostDao = ComponentLocator.inject(HostDaoImpl.class);
protected VMInstanceDaoImpl() {
IdStatesSearch = createSearchBuilder();
IdStatesSearch.and("id", IdStatesSearch.entity().getId(), Op.EQ);
@ -99,6 +101,14 @@ public class VMInstanceDaoImpl extends GenericDaoBase<VMInstanceVO, Long> implem
hostSearch.and("clusterId", hostSearch.entity().getClusterId(), SearchCriteria.Op.EQ);
VMClusterSearch.done();
StartingVMClusterSearch = createSearchBuilder();
SearchBuilder<HostVO> hostSearch1 = _hostDao.createSearchBuilder();
StartingVMClusterSearch.join("hostSearch1", hostSearch1, hostSearch1.entity().getId(), StartingVMClusterSearch.entity().getHostId(), JoinType.INNER);
hostSearch1.and("clusterId", hostSearch1.entity().getClusterId(), SearchCriteria.Op.EQ);
StartingVMClusterSearch.done();
AllFieldsSearch = createSearchBuilder();
AllFieldsSearch.and("host", AllFieldsSearch.entity().getHostId(), Op.EQ);
AllFieldsSearch.and("lastHost", AllFieldsSearch.entity().getLastHostId(), Op.EQ);
@ -212,10 +222,17 @@ public class VMInstanceDaoImpl extends GenericDaoBase<VMInstanceVO, Long> implem
public List<VMInstanceVO> listByClusterId(long clusterId) {
SearchCriteria<VMInstanceVO> sc = VMClusterSearch.create();
sc.setJoinParameters("hostSearch", "clusterId", clusterId);
return listBy(sc);
}
@Override
public List<VMInstanceVO> listStartingByClusterId(long clusterId) {
SearchCriteria<VMInstanceVO> sc = StartingVMClusterSearch.create();
sc.setJoinParameters("hostSearch1", "clusterId", clusterId);
return listBy(sc);
}
@Override
public List<VMInstanceVO> listByZoneIdAndType(long zoneId, VirtualMachine.Type type) {
SearchCriteria<VMInstanceVO> sc = AllFieldsSearch.create();