bug 13369: recycle roger hung worker VMs in vCenter

This commit is contained in:
Kelven Yang 2012-01-27 15:16:58 -08:00
parent 09dc843a64
commit cb959aa377
5 changed files with 112 additions and 25 deletions

View File

@ -16,6 +16,7 @@ import java.util.Collection;
import java.util.Collections;
import java.util.Comparator;
import java.util.Date;
import java.util.GregorianCalendar;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
@ -254,6 +255,7 @@ public class VmwareResource implements StoragePoolResource, ServerResource, Vmwa
protected float _memOverprovisioningFactor = 1;
protected boolean _reserveMem = false;
protected boolean _recycleHungWorker = false;
protected DiskControllerType _rootDiskController = DiskControllerType.ide;
protected ManagedObjectReference _morHyperHost;
@ -3121,7 +3123,55 @@ public class VmwareResource implements StoragePoolResource, ServerResource, Vmwa
VmwareManager mgr = hyperHost.getContext().getStockObject(VmwareManager.CONTEXT_STOCK_NAME);
if(hyperHost.isHyperHostConnected()) {
mgr.gcLeftOverVMs(context);
mgr.gcLeftOverVMs(context);
if(_recycleHungWorker) {
s_logger.info("Scan hung worker VM to recycle");
// GC worker that has been running for too long
ObjectContent[] ocs = hyperHost.getVmPropertiesOnHyperHost(
new String[] {"name", "config.template", "runtime.powerState", "runtime.bootTime"});
if(ocs != null) {
for(ObjectContent oc : ocs) {
DynamicProperty[] props = oc.getPropSet();
if(props != null) {
String name = null;
boolean template = false;
VirtualMachinePowerState powerState = VirtualMachinePowerState.poweredOff;
GregorianCalendar bootTime = null;
for(DynamicProperty prop : props) {
if(prop.getName().equals("name"))
name = prop.getVal().toString();
else if(prop.getName().equals("config.template"))
template = (Boolean)prop.getVal();
else if(prop.getName().equals("runtime.powerState"))
powerState = (VirtualMachinePowerState)prop.getVal();
else if(prop.getName().equals("runtime.bootTime"))
bootTime = (GregorianCalendar)prop.getVal();
}
if(!template && name.matches("[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}")) {
boolean recycle = false;
// recycle stopped worker VM and VM that has been running for too long (hard-coded 10 hours for now)
if(powerState == VirtualMachinePowerState.poweredOff)
recycle = true;
else if(bootTime != null && (new Date().getTime() - bootTime.getTimeInMillis() > 10*3600*1000))
recycle = true;
if(recycle) {
s_logger.info("Recycle pending worker VM: " + name);
VirtualMachineMO vmMo = new VirtualMachineMO(hyperHost.getContext(), oc.getObj());
vmMo.powerOff();
vmMo.destroy();
}
}
}
}
}
}
} else {
s_logger.error("Host is no longer connected.");
return null;
@ -3325,7 +3375,7 @@ public class VmwareResource implements StoragePoolResource, ServerResource, Vmwa
details.put("NativeHA", "true");
}
}
protected HashMap<String, State> sync() {
HashMap<String, State> changes = new HashMap<String, State>();
HashMap<String, State> oldStates = null;
@ -3825,25 +3875,9 @@ public class VmwareResource implements StoragePoolResource, ServerResource, Vmwa
_password = (String) params.get("password");
_dcId = (String) params.get("zone");
_pod = (String) params.get("pod");
_cluster = (String) params.get("cluster");
_cluster = (String) params.get("cluster");
_guid = (String) params.get("guid");
String value = (String) params.get("cpu.overprovisioning.factor");
if(value != null)
_cpuOverprovisioningFactor = Float.parseFloat(value);
value = (String) params.get("vmware.reserve.cpu");
if(value != null && value.equalsIgnoreCase("true"))
_reserveCpu = true;
value = (String) params.get("mem.overprovisioning.factor");
if(value != null)
_memOverprovisioningFactor = Float.parseFloat(value);
value = (String) params.get("vmware.reserve.mem");
if(value != null && value.equalsIgnoreCase("true"))
_reserveMem = true;
String[] tokens = _guid.split("@");
_vCenterAddress = tokens[1];
_morHyperHost = new ManagedObjectReference();
@ -3871,6 +3905,26 @@ public class VmwareResource implements StoragePoolResource, ServerResource, Vmwa
_privateNetworkVSwitchName = (String) params.get("private.network.vswitch.name");
_publicNetworkVSwitchName = (String) params.get("public.network.vswitch.name");
_guestNetworkVSwitchName = (String) params.get("guest.network.vswitch.name");
String value = (String) params.get("cpu.overprovisioning.factor");
if(value != null)
_cpuOverprovisioningFactor = Float.parseFloat(value);
value = (String) params.get("vmware.reserve.cpu");
if(value != null && value.equalsIgnoreCase("true"))
_reserveCpu = true;
value = (String) params.get("vmware.recycle.hung.wokervm");
if(value != null && value.equalsIgnoreCase("true"))
_recycleHungWorker = true;
value = (String) params.get("mem.overprovisioning.factor");
if(value != null)
_memOverprovisioningFactor = Float.parseFloat(value);
value = (String) params.get("vmware.reserve.mem");
if(value != null && value.equalsIgnoreCase("true"))
_reserveMem = true;
value = (String)params.get("vmware.root.disk.controller");
if(value != null && value.equalsIgnoreCase("scsi"))

View File

@ -4,13 +4,17 @@
package com.cloud.hypervisor.vmware.mo;
import java.util.GregorianCalendar;
import org.apache.log4j.Logger;
import com.cloud.hypervisor.vmware.util.VmwareContext;
import com.cloud.serializer.GsonHelper;
import com.cloud.utils.testcase.Log4jEnabledTestCase;
import com.google.gson.Gson;
import com.vmware.vim25.DynamicProperty;
import com.vmware.vim25.ManagedObjectReference;
import com.vmware.vim25.ObjectContent;
import com.vmware.vim25.VirtualMachineConfigSpec;
// This test case needs a particular setup, only used for my own test
@ -22,11 +26,31 @@ public class TestVmwareMO extends Log4jEnabledTestCase {
VmwareContext context = TestVmwareContextFactory.create(
"10.223.80.29", "Administrator", "Suite219");
HostMO hostMo = new HostMO(context, "HostSystem", "host-9");
System.out.println("host Type " + hostMo.getHostType());
Gson gson = GsonHelper.getGsonLogger();
System.out.println(gson.toJson(hostMo.getHostFirewallSystemMO().getFirewallInfo()));
HostMO hostMo = new HostMO(context, "HostSystem", "host-10");
ObjectContent[] ocs = hostMo.getVmPropertiesOnHyperHost(new String[] {"name", "config.template", "runtime.bootTime"});
if(ocs != null) {
for(ObjectContent oc : ocs) {
DynamicProperty[] props = oc.getPropSet();
if(props != null) {
String name = null;
boolean template = false;
GregorianCalendar bootTime = null;
for(DynamicProperty prop : props) {
if(prop.getName().equals("name"))
name = prop.getVal().toString();
else if(prop.getName().equals("config.template"))
template = (Boolean)prop.getVal();
else if(prop.getName().equals("runtime.bootTime"))
bootTime = (GregorianCalendar)prop.getVal();
}
System.out.println("name: " + name + ", template: " + template + ", bootTime: " + bootTime);
}
System.out.println("");
}
}
context.close();
} catch(Exception e) {

View File

@ -259,6 +259,7 @@ public enum Config {
VmwareReserveMem("Advanced", ManagementServer.class, Boolean.class, "vmware.reserve.mem", "false", "Specify whether or not to reserve memory based on memory overprovisioning factor", null),
VmwareRootDiskControllerType("Advanced", ManagementServer.class, String.class, "vmware.root.disk.controller", "ide", "Specify the default disk controller for root volumes, valid values are scsi, ide", null),
VmwareSystemVmNicDeviceType("Advanced", ManagementServer.class, String.class, "vmware.systemvm.nic.device.type", "E1000", "Specify the default network device type for system VMs, valid values are E1000, PCNet32, Vmxnet2, Vmxnet3", null),
VmwareRecycleHungWorker("Advanced", ManagementServer.class, Boolean.class, "vmware.recycle.hung.wokervm", "false", "Specify whether or not to recycle hung worker VMs", null),
// KVM
KvmPublicNetwork("Advanced", ManagementServer.class, String.class, "kvm.public.network.device", null, "Specify the public bridge on host for public network", null),

View File

@ -115,6 +115,7 @@ public class VmwareManagerImpl implements VmwareManager, VmwareStorageMount, Lis
String _serviceConsoleName;
String _managemetPortGroupName;
String _defaultSystemVmNicAdapterType = VirtualEthernetCardType.E1000.toString();
String _recycleHungWorker = "false";
int _additionalPortRangeStart;
int _additionalPortRangeSize;
int _maxHostsPerCluster;
@ -255,6 +256,10 @@ public class VmwareManagerImpl implements VmwareManager, VmwareStorageMount, Lis
if(_reserveMem == null || _reserveMem.isEmpty())
_reserveMem = "false";
_recycleHungWorker = configDao.getValue(Config.VmwareRecycleHungWorker.key());
if(_recycleHungWorker == null || _recycleHungWorker.isEmpty())
_recycleHungWorker = "false";
_rootDiskController = configDao.getValue(Config.VmwareRootDiskControllerType.key());
if(_rootDiskController == null || _rootDiskController.isEmpty())
_rootDiskController = DiskControllerType.ide.toString();
@ -478,6 +483,7 @@ public class VmwareManagerImpl implements VmwareManager, VmwareStorageMount, Lis
params.put("mem.overprovisioning.factor", _memOverprovisioningFactor);
params.put("vmware.reserve.mem", _reserveMem);
params.put("vmware.root.disk.controller", _rootDiskController);
params.put("vmware.recycle.hung.wokervm", _recycleHungWorker);
}
@Override

View File

@ -35,6 +35,8 @@ CREATE TABLE `cloud`.`mshost_peer` (
) ENGINE=InnoDB DEFAULT CHARSET=utf8;
INSERT IGNORE INTO configuration (category, instance, component, name, value, description) VALUES ('Advanced', 'DEFAULT', 'management-server', 'vmware.systemvm.nic.device.type', 'E1000', 'Specify the default network device type for system VMs, valid values are E1000, PCNet32, Vmxnet2, Vmxnet3');
INSERT IGNORE INTO configuration (category, instance, component, name, value, description) VALUES ('Advanced', 'DEFAULT', 'management-server', 'vmware.recycle.hung.wokervm', 'false', 'Specify whether or not to recycle hung worker VMs');
INSERT IGNORE INTO `cloud`.`configuration` VALUES ('Storage', 'DEFAULT', 'StorageManager', 'backup.snapshot.wait', '10800', 'In second, timeout for BackupSnapshotCommand');
INSERT IGNORE INTO `cloud`.`configuration` VALUES ('Storage', 'DEFAULT', 'StorageManager', 'copy.volume.wait', '10800', 'In second, timeout for copy volume command');
INSERT IGNORE INTO `cloud`.`configuration` VALUES ('Storage', 'DEFAULT', 'UserVmManager', 'create.private.template.from.snapshot.wait', '10800', 'In second, timeout for CreatePrivateTemplateFromSnapshotCommand');