From cb959aa377449f695c8061e5b7fca74373b65a26 Mon Sep 17 00:00:00 2001 From: Kelven Yang Date: Fri, 27 Jan 2012 15:16:58 -0800 Subject: [PATCH] bug 13369: recycle roger hung worker VMs in vCenter --- .../vmware/resource/VmwareResource.java | 94 +++++++++++++++---- .../hypervisor/vmware/mo/TestVmwareMO.java | 34 ++++++- .../src/com/cloud/configuration/Config.java | 1 + .../hypervisor/vmware/VmwareManagerImpl.java | 6 ++ setup/db/db/schema-2213to2214.sql | 2 + 5 files changed, 112 insertions(+), 25 deletions(-) diff --git a/core/src/com/cloud/hypervisor/vmware/resource/VmwareResource.java b/core/src/com/cloud/hypervisor/vmware/resource/VmwareResource.java index 8c8850e607f..cc4a09458f9 100755 --- a/core/src/com/cloud/hypervisor/vmware/resource/VmwareResource.java +++ b/core/src/com/cloud/hypervisor/vmware/resource/VmwareResource.java @@ -16,6 +16,7 @@ import java.util.Collection; import java.util.Collections; import java.util.Comparator; import java.util.Date; +import java.util.GregorianCalendar; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -254,6 +255,7 @@ public class VmwareResource implements StoragePoolResource, ServerResource, Vmwa protected float _memOverprovisioningFactor = 1; protected boolean _reserveMem = false; + protected boolean _recycleHungWorker = false; protected DiskControllerType _rootDiskController = DiskControllerType.ide; protected ManagedObjectReference _morHyperHost; @@ -3121,7 +3123,55 @@ public class VmwareResource implements StoragePoolResource, ServerResource, Vmwa VmwareManager mgr = hyperHost.getContext().getStockObject(VmwareManager.CONTEXT_STOCK_NAME); if(hyperHost.isHyperHostConnected()) { - mgr.gcLeftOverVMs(context); + mgr.gcLeftOverVMs(context); + + if(_recycleHungWorker) { + s_logger.info("Scan hung worker VM to recycle"); + + // GC worker that has been running for too long + ObjectContent[] ocs = hyperHost.getVmPropertiesOnHyperHost( + new String[] {"name", "config.template", "runtime.powerState", "runtime.bootTime"}); + if(ocs != null) { + for(ObjectContent oc : ocs) { + DynamicProperty[] props = oc.getPropSet(); + if(props != null) { + String name = null; + boolean template = false; + VirtualMachinePowerState powerState = VirtualMachinePowerState.poweredOff; + GregorianCalendar bootTime = null; + + for(DynamicProperty prop : props) { + if(prop.getName().equals("name")) + name = prop.getVal().toString(); + else if(prop.getName().equals("config.template")) + template = (Boolean)prop.getVal(); + else if(prop.getName().equals("runtime.powerState")) + powerState = (VirtualMachinePowerState)prop.getVal(); + else if(prop.getName().equals("runtime.bootTime")) + bootTime = (GregorianCalendar)prop.getVal(); + } + + if(!template && name.matches("[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}")) { + boolean recycle = false; + + // recycle stopped worker VM and VM that has been running for too long (hard-coded 10 hours for now) + if(powerState == VirtualMachinePowerState.poweredOff) + recycle = true; + else if(bootTime != null && (new Date().getTime() - bootTime.getTimeInMillis() > 10*3600*1000)) + recycle = true; + + if(recycle) { + s_logger.info("Recycle pending worker VM: " + name); + + VirtualMachineMO vmMo = new VirtualMachineMO(hyperHost.getContext(), oc.getObj()); + vmMo.powerOff(); + vmMo.destroy(); + } + } + } + } + } + } } else { s_logger.error("Host is no longer connected."); return null; @@ -3325,7 +3375,7 @@ public class VmwareResource implements StoragePoolResource, ServerResource, Vmwa details.put("NativeHA", "true"); } } - + protected HashMap sync() { HashMap changes = new HashMap(); HashMap oldStates = null; @@ -3825,25 +3875,9 @@ public class VmwareResource implements StoragePoolResource, ServerResource, Vmwa _password = (String) params.get("password"); _dcId = (String) params.get("zone"); _pod = (String) params.get("pod"); - _cluster = (String) params.get("cluster"); + _cluster = (String) params.get("cluster"); + _guid = (String) params.get("guid"); - - String value = (String) params.get("cpu.overprovisioning.factor"); - if(value != null) - _cpuOverprovisioningFactor = Float.parseFloat(value); - - value = (String) params.get("vmware.reserve.cpu"); - if(value != null && value.equalsIgnoreCase("true")) - _reserveCpu = true; - - value = (String) params.get("mem.overprovisioning.factor"); - if(value != null) - _memOverprovisioningFactor = Float.parseFloat(value); - - value = (String) params.get("vmware.reserve.mem"); - if(value != null && value.equalsIgnoreCase("true")) - _reserveMem = true; - String[] tokens = _guid.split("@"); _vCenterAddress = tokens[1]; _morHyperHost = new ManagedObjectReference(); @@ -3871,6 +3905,26 @@ public class VmwareResource implements StoragePoolResource, ServerResource, Vmwa _privateNetworkVSwitchName = (String) params.get("private.network.vswitch.name"); _publicNetworkVSwitchName = (String) params.get("public.network.vswitch.name"); _guestNetworkVSwitchName = (String) params.get("guest.network.vswitch.name"); + + String value = (String) params.get("cpu.overprovisioning.factor"); + if(value != null) + _cpuOverprovisioningFactor = Float.parseFloat(value); + + value = (String) params.get("vmware.reserve.cpu"); + if(value != null && value.equalsIgnoreCase("true")) + _reserveCpu = true; + + value = (String) params.get("vmware.recycle.hung.wokervm"); + if(value != null && value.equalsIgnoreCase("true")) + _recycleHungWorker = true; + + value = (String) params.get("mem.overprovisioning.factor"); + if(value != null) + _memOverprovisioningFactor = Float.parseFloat(value); + + value = (String) params.get("vmware.reserve.mem"); + if(value != null && value.equalsIgnoreCase("true")) + _reserveMem = true; value = (String)params.get("vmware.root.disk.controller"); if(value != null && value.equalsIgnoreCase("scsi")) diff --git a/core/test/com/cloud/hypervisor/vmware/mo/TestVmwareMO.java b/core/test/com/cloud/hypervisor/vmware/mo/TestVmwareMO.java index 125d0592b54..5f9c0c714da 100755 --- a/core/test/com/cloud/hypervisor/vmware/mo/TestVmwareMO.java +++ b/core/test/com/cloud/hypervisor/vmware/mo/TestVmwareMO.java @@ -4,13 +4,17 @@ package com.cloud.hypervisor.vmware.mo; +import java.util.GregorianCalendar; + import org.apache.log4j.Logger; import com.cloud.hypervisor.vmware.util.VmwareContext; import com.cloud.serializer.GsonHelper; import com.cloud.utils.testcase.Log4jEnabledTestCase; import com.google.gson.Gson; +import com.vmware.vim25.DynamicProperty; import com.vmware.vim25.ManagedObjectReference; +import com.vmware.vim25.ObjectContent; import com.vmware.vim25.VirtualMachineConfigSpec; // This test case needs a particular setup, only used for my own test @@ -22,11 +26,31 @@ public class TestVmwareMO extends Log4jEnabledTestCase { VmwareContext context = TestVmwareContextFactory.create( "10.223.80.29", "Administrator", "Suite219"); - HostMO hostMo = new HostMO(context, "HostSystem", "host-9"); - - System.out.println("host Type " + hostMo.getHostType()); - Gson gson = GsonHelper.getGsonLogger(); - System.out.println(gson.toJson(hostMo.getHostFirewallSystemMO().getFirewallInfo())); + HostMO hostMo = new HostMO(context, "HostSystem", "host-10"); + ObjectContent[] ocs = hostMo.getVmPropertiesOnHyperHost(new String[] {"name", "config.template", "runtime.bootTime"}); + if(ocs != null) { + for(ObjectContent oc : ocs) { + DynamicProperty[] props = oc.getPropSet(); + if(props != null) { + String name = null; + boolean template = false; + GregorianCalendar bootTime = null; + + for(DynamicProperty prop : props) { + if(prop.getName().equals("name")) + name = prop.getVal().toString(); + else if(prop.getName().equals("config.template")) + template = (Boolean)prop.getVal(); + else if(prop.getName().equals("runtime.bootTime")) + bootTime = (GregorianCalendar)prop.getVal(); + } + + System.out.println("name: " + name + ", template: " + template + ", bootTime: " + bootTime); + + } + System.out.println(""); + } + } context.close(); } catch(Exception e) { diff --git a/server/src/com/cloud/configuration/Config.java b/server/src/com/cloud/configuration/Config.java index dc280469ef8..e06e384d6b0 100755 --- a/server/src/com/cloud/configuration/Config.java +++ b/server/src/com/cloud/configuration/Config.java @@ -259,6 +259,7 @@ public enum Config { VmwareReserveMem("Advanced", ManagementServer.class, Boolean.class, "vmware.reserve.mem", "false", "Specify whether or not to reserve memory based on memory overprovisioning factor", null), VmwareRootDiskControllerType("Advanced", ManagementServer.class, String.class, "vmware.root.disk.controller", "ide", "Specify the default disk controller for root volumes, valid values are scsi, ide", null), VmwareSystemVmNicDeviceType("Advanced", ManagementServer.class, String.class, "vmware.systemvm.nic.device.type", "E1000", "Specify the default network device type for system VMs, valid values are E1000, PCNet32, Vmxnet2, Vmxnet3", null), + VmwareRecycleHungWorker("Advanced", ManagementServer.class, Boolean.class, "vmware.recycle.hung.wokervm", "false", "Specify whether or not to recycle hung worker VMs", null), // KVM KvmPublicNetwork("Advanced", ManagementServer.class, String.class, "kvm.public.network.device", null, "Specify the public bridge on host for public network", null), diff --git a/server/src/com/cloud/hypervisor/vmware/VmwareManagerImpl.java b/server/src/com/cloud/hypervisor/vmware/VmwareManagerImpl.java index ecd492cda94..157665acc6d 100755 --- a/server/src/com/cloud/hypervisor/vmware/VmwareManagerImpl.java +++ b/server/src/com/cloud/hypervisor/vmware/VmwareManagerImpl.java @@ -115,6 +115,7 @@ public class VmwareManagerImpl implements VmwareManager, VmwareStorageMount, Lis String _serviceConsoleName; String _managemetPortGroupName; String _defaultSystemVmNicAdapterType = VirtualEthernetCardType.E1000.toString(); + String _recycleHungWorker = "false"; int _additionalPortRangeStart; int _additionalPortRangeSize; int _maxHostsPerCluster; @@ -255,6 +256,10 @@ public class VmwareManagerImpl implements VmwareManager, VmwareStorageMount, Lis if(_reserveMem == null || _reserveMem.isEmpty()) _reserveMem = "false"; + _recycleHungWorker = configDao.getValue(Config.VmwareRecycleHungWorker.key()); + if(_recycleHungWorker == null || _recycleHungWorker.isEmpty()) + _recycleHungWorker = "false"; + _rootDiskController = configDao.getValue(Config.VmwareRootDiskControllerType.key()); if(_rootDiskController == null || _rootDiskController.isEmpty()) _rootDiskController = DiskControllerType.ide.toString(); @@ -478,6 +483,7 @@ public class VmwareManagerImpl implements VmwareManager, VmwareStorageMount, Lis params.put("mem.overprovisioning.factor", _memOverprovisioningFactor); params.put("vmware.reserve.mem", _reserveMem); params.put("vmware.root.disk.controller", _rootDiskController); + params.put("vmware.recycle.hung.wokervm", _recycleHungWorker); } @Override diff --git a/setup/db/db/schema-2213to2214.sql b/setup/db/db/schema-2213to2214.sql index 3ce08669123..a4f3f2b918c 100644 --- a/setup/db/db/schema-2213to2214.sql +++ b/setup/db/db/schema-2213to2214.sql @@ -35,6 +35,8 @@ CREATE TABLE `cloud`.`mshost_peer` ( ) ENGINE=InnoDB DEFAULT CHARSET=utf8; INSERT IGNORE INTO configuration (category, instance, component, name, value, description) VALUES ('Advanced', 'DEFAULT', 'management-server', 'vmware.systemvm.nic.device.type', 'E1000', 'Specify the default network device type for system VMs, valid values are E1000, PCNet32, Vmxnet2, Vmxnet3'); +INSERT IGNORE INTO configuration (category, instance, component, name, value, description) VALUES ('Advanced', 'DEFAULT', 'management-server', 'vmware.recycle.hung.wokervm', 'false', 'Specify whether or not to recycle hung worker VMs'); + INSERT IGNORE INTO `cloud`.`configuration` VALUES ('Storage', 'DEFAULT', 'StorageManager', 'backup.snapshot.wait', '10800', 'In second, timeout for BackupSnapshotCommand'); INSERT IGNORE INTO `cloud`.`configuration` VALUES ('Storage', 'DEFAULT', 'StorageManager', 'copy.volume.wait', '10800', 'In second, timeout for copy volume command'); INSERT IGNORE INTO `cloud`.`configuration` VALUES ('Storage', 'DEFAULT', 'UserVmManager', 'create.private.template.from.snapshot.wait', '10800', 'In second, timeout for CreatePrivateTemplateFromSnapshotCommand');