bug 10657: Introducing cluster level global thresholds for cpu and ram so that these resources do not go beyond these thresholds. The reason for this is because, if the admn needs to provide maintenance, they don't have to add new machines or have ones on standby if the entire zone/pod/cluster is at 100% allocated capacity. Also introducing pool level global thresholds for allocated storage. There are other changes like DB upgrade and introduction of transaction.

This commit is contained in:
Nitin 2011-10-29 16:51:37 +05:30
parent 3e5f63f08b
commit 2b370ab535
6 changed files with 123 additions and 29 deletions

View File

@ -410,6 +410,7 @@ public class CapacityManagerImpl implements CapacityManager, StateListener<State
}
@DB
@Override
public void updateCapacityForHost(HostVO host){
// prep the service offerings
@ -489,7 +490,8 @@ public class CapacityManagerImpl implements CapacityManager, StateListener<State
}
}else {
Transaction txn = Transaction.currentTxn();
txn.start();
CapacityVO capacity = new CapacityVO(host.getId(),
host.getDataCenterId(), host.getPodId(), host.getClusterId(), usedMemory,
host.getTotalMemory(),
@ -507,6 +509,7 @@ public class CapacityManagerImpl implements CapacityManager, StateListener<State
CapacityVO.CAPACITY_TYPE_CPU);
capacity.setReservedCapacity(reservedCpu);
_capacityDao.persist(capacity);
txn.commit();
}

View File

@ -101,10 +101,12 @@ public class CapacityDaoImpl extends GenericDaoBase<CapacityVO, Long> implements
SummedCapacitySearch.select("sumReserved", Func.SUM, SummedCapacitySearch.entity().getReservedCapacity());
SummedCapacitySearch.select("sumTotal", Func.SUM, SummedCapacitySearch.entity().getTotalCapacity());
SummedCapacitySearch.select("capacityType", Func.NATIVE, SummedCapacitySearch.entity().getCapacityType());
SummedCapacitySearch.and("dcId", SummedCapacitySearch.entity().getDataCenterId(), Op.EQ);
SummedCapacitySearch.groupBy(SummedCapacitySearch.entity().getCapacityType());
if (zoneId != null){
SummedCapacitySearch.and("dcId", SummedCapacitySearch.entity().getDataCenterId(), Op.EQ);
}
if (podId != null){
SummedCapacitySearch.and("podId", SummedCapacitySearch.entity().getPodId(), Op.EQ);
}
@ -119,7 +121,9 @@ public class CapacityDaoImpl extends GenericDaoBase<CapacityVO, Long> implements
SearchCriteria<SummedCapacity> sc = SummedCapacitySearch.create();
sc.setParameters("dcId", zoneId);
if (zoneId != null){
sc.setParameters("dcId", zoneId);
}
if (podId != null){
sc.setParameters("podId", podId);
}

View File

@ -51,17 +51,18 @@ public enum Config {
CapacityCheckPeriod("Alert", ManagementServer.class, Integer.class, "capacity.check.period", "300000", "The interval in milliseconds between capacity checks", null),
StorageAllocatedCapacityThreshold("Alert", ManagementServer.class, Float.class, "cluster.storage.allocated.capacity.notificationthreshold", "0.75", "Percentage (as a value between 0 and 1) of allocated storage utilization above which alerts will be sent about low storage available.", null),
StorageCapacityThreshold("Alert", ManagementServer.class, Float.class, "cluster.storage.capacity.notificationthreshold", "0.75", "Percentage (as a value between 0 and 1) of storage utilization above which alerts will be sent about low storage available.", null),
CPUCapacityThreshold("Alert", ManagementServer.class, Float.class, "cluster.cpu.capacity.notificationthreshold", "0.75", "Percentage (as a value between 0 and 1) of cpu utilization above which alerts will be sent about low cpu available.", null),
MemoryCapacityThreshold("Alert", ManagementServer.class, Float.class, "cluster.memory.capacity.notificationthreshold", "0.75", "Percentage (as a value between 0 and 1) of memory utilization above which alerts will be sent about low memory available.", null),
CPUCapacityThreshold("Alert", ManagementServer.class, Float.class, "cluster.cpu.allocated.capacity.notificationthreshold", "0.75", "Percentage (as a value between 0 and 1) of cpu utilization above which alerts will be sent about low cpu available.", null),
MemoryCapacityThreshold("Alert", ManagementServer.class, Float.class, "cluster.memory.allocated.capacity.notificationthreshold", "0.75", "Percentage (as a value between 0 and 1) of memory utilization above which alerts will be sent about low memory available.", null),
PublicIpCapacityThreshold("Alert", ManagementServer.class, Float.class, "zone.virtualnetwork.publicip.capacity.notificationthreshold", "0.75", "Percentage (as a value between 0 and 1) of public IP address space utilization above which alerts will be sent.", null),
PrivateIpCapacityThreshold("Alert", ManagementServer.class, Float.class, "pod.privateip.capacity.notificationthreshold", "0.75", "Percentage (as a value between 0 and 1) of private IP address space utilization above which alerts will be sent.", null),
SecondaryStorageCapacityThreshold("Alert", ManagementServer.class, Float.class, "zone.secstorage.capacity.notificationthreshold", "0.75", "Percentage (as a value between 0 and 1) of secondary storage utilization above which alerts will be sent about low storage available.", null),
VlanCapacityThreshold("Alert", ManagementServer.class, Float.class, "zone.vlan.capacity.notificationthreshold", "0.75", "Percentage (as a value between 0 and 1) of Zone Vlan utilization above which alerts will be sent about low number of Zone Vlans.", null),
DirectNetworkPublicIpCapacityThreshold("Alert", ManagementServer.class, Float.class, "zone.directnetwork.publicip.capacity.notificationthreshold", "0.85", "Percentage (as a value between 0 and 1) of Direct Network Public Ip Utilization above which alerts will be sent about low number of direct network public ips.", null),
DirectNetworkPublicIpCapacityThreshold("Alert", ManagementServer.class, Float.class, "zone.directnetwork.publicip.capacity.notificationthreshold", "0.75", "Percentage (as a value between 0 and 1) of Direct Network Public Ip Utilization above which alerts will be sent about low number of direct network public ips.", null),
LocalStorageCapacityThreshold("Alert", ManagementServer.class, Float.class, "cluster.localStorage.capacity.notificationthreshold", "0.75", "Percentage (as a value between 0 and 1) of Direct Network Public Ip Utilization above which alerts will be sent about low number of direct network public ips.", null),
StorageAllocatedCapacityDisableThreshold("Alert", ManagementServer.class, Float.class, "cluster.storage.allocated.capacity.disablethreshold", "0.85", "Percentage (as a value between 0 and 1) of allocated storage utilization above which allocators will disable using the cluster for low storage available.", null),
CPUCapacityDisableThreshold("Alert", ManagementServer.class, Float.class, "cluster.cpu.capacity.disablethreshold", "0.85", "Percentage (as a value between 0 and 1) of cpu utilization above which allocators will disable using the cluster for low cpu available.", null),
MemoryCapacityDisableThreshold("Alert", ManagementServer.class, Float.class, "cluster.memory.capacity.disablethreshold", "0.85", "Percentage (as a value between 0 and 1) of cpu utilization above which allocators will disable using the cluster for low memory available.", null),
StorageAllocatedCapacityDisableThreshold("Alert", ManagementServer.class, Float.class, "pool.storage.allocated.capacity.disablethreshold", "0.85", "Percentage (as a value between 0 and 1) of allocated storage utilization above which allocators will disable using the cluster for low allocated storage available.", null),
StorageCapacityDisableThreshold("Alert", ManagementServer.class, Float.class, "pool.storage.capacity.disablethreshold", "0.85", "Percentage (as a value between 0 and 1) of storage utilization above which allocators will disable using the cluster for low storage available.", null),
CPUCapacityDisableThreshold("Alert", ManagementServer.class, Float.class, "cluster.cpu.allocated.capacity.disablethreshold", "0.85", "Percentage (as a value between 0 and 1) of cpu utilization above which allocators will disable using the cluster for low cpu available.", null),
MemoryCapacityDisableThreshold("Alert", ManagementServer.class, Float.class, "cluster.memory.allocated.capacity.disablethreshold", "0.85", "Percentage (as a value between 0 and 1) of cpu utilization above which allocators will disable using the cluster for low memory available.", null),
// Storage

View File

@ -30,9 +30,12 @@ import javax.ejb.Local;
import org.apache.log4j.Logger;
import com.cloud.agent.manager.allocator.HostAllocator;
import com.cloud.api.ApiDBUtils;
import com.cloud.capacity.Capacity;
import com.cloud.capacity.CapacityManager;
import com.cloud.capacity.CapacityVO;
import com.cloud.capacity.dao.CapacityDao;
import com.cloud.capacity.dao.CapacityDaoImpl.SummedCapacity;
import com.cloud.configuration.Config;
import com.cloud.configuration.dao.ConfigurationDao;
import com.cloud.dc.ClusterVO;
@ -43,6 +46,7 @@ import com.cloud.dc.Pod;
import com.cloud.dc.dao.ClusterDao;
import com.cloud.dc.dao.DataCenterDao;
import com.cloud.dc.dao.HostPodDao;
import com.cloud.deploy.DeploymentPlanner.ExcludeList;
import com.cloud.exception.InsufficientServerCapacityException;
import com.cloud.host.Host;
import com.cloud.host.HostVO;
@ -338,8 +342,71 @@ public class FirstFitPlanner extends PlannerBase implements DeploymentPlanner {
}
return disabledClusters;
}
private Map<Short,Float> getCapacityThresholdMap(){
// Lets build this real time so that the admin wont have to restart MS if he changes these values
Map<Short,Float> disableThresholdMap = new HashMap<Short, Float>();
String cpuDisableThresholdString = _configDao.getValue(Config.CPUCapacityDisableThreshold.key());
float cpuDisableThreshold = NumbersUtil.parseFloat(cpuDisableThresholdString, 0.85F);
disableThresholdMap.put(Capacity.CAPACITY_TYPE_CPU, cpuDisableThreshold);
String memoryDisableThresholdString = _configDao.getValue(Config.MemoryCapacityDisableThreshold.key());
float memoryDisableThreshold = NumbersUtil.parseFloat(memoryDisableThresholdString, 0.85F);
disableThresholdMap.put(Capacity.CAPACITY_TYPE_MEMORY, memoryDisableThreshold);
return disableThresholdMap;
}
private List<Short> getCapacitiesForCheckingThreshold(){
List<Short> capacityList = new ArrayList<Short>();
capacityList.add(Capacity.CAPACITY_TYPE_CPU);
capacityList.add(Capacity.CAPACITY_TYPE_MEMORY);
return capacityList;
}
private void removeClustersCrossingThreshold(List<Long> clusterList, ExcludeList avoid, VirtualMachineProfile<? extends VirtualMachine> vmProfile){
Map<Short,Float> capacityThresholdMap = getCapacityThresholdMap();
List<Short> capacityList = getCapacitiesForCheckingThreshold();
List<Long> clustersCrossingThreshold = new ArrayList<Long>();
ServiceOffering offering = vmProfile.getServiceOffering();
int cpu_requested = offering.getCpu() * offering.getSpeed();
long ram_requested = offering.getRamSize() * 1024L * 1024L;
// Iterate over the cluster List and check for each cluster whether it breaks disable threshold for any of the capacity types
for (Long clusterId : clusterList){
for(short capacity : capacityList){
List<SummedCapacity> summedCapacityList = _capacityDao.findCapacityBy(new Integer(capacity), null, null, clusterId);
if (summedCapacityList != null && summedCapacityList.size() != 0 && summedCapacityList.get(0).getTotalCapacity() != 0){
double used = (double)(summedCapacityList.get(0).getUsedCapacity() + summedCapacityList.get(0).getReservedCapacity());
double total = summedCapacityList.get(0).getTotalCapacity();
if (capacity == Capacity.CAPACITY_TYPE_CPU){
total = total * ApiDBUtils.getCpuOverprovisioningFactor();
used = used + cpu_requested;
}else{
used = used + ram_requested;
}
double usedPercentage = used/total;
if ( usedPercentage > capacityThresholdMap.get(capacity)){
avoid.addCluster(clusterId);
clustersCrossingThreshold.add(clusterId);
s_logger.debug("Cannot allocate cluster " + clusterId + " for vm creation since its allocated percentage: " +usedPercentage +
" will cross the disable capacity threshold: " + capacityThresholdMap.get(capacity) + " for capacity Type : " + capacity + ", skipping this cluster");
break;
}
}
}
}
clusterList.removeAll(clustersCrossingThreshold);
}
private DeployDestination checkClustersforDestination(List<Long> clusterList, VirtualMachineProfile<? extends VirtualMachine> vmProfile,
DeploymentPlan plan, ExcludeList avoid, DataCenter dc, String _allocationAlgorithm){
@ -348,6 +415,8 @@ public class FirstFitPlanner extends PlannerBase implements DeploymentPlanner {
s_logger.trace("ClusterId List to consider: " + clusterList);
}
removeClustersCrossingThreshold(clusterList, avoid, vmProfile);
for(Long clusterId : clusterList){
Cluster clusterVO = _clusterDao.findById(clusterId);
@ -356,7 +425,7 @@ public class FirstFitPlanner extends PlannerBase implements DeploymentPlanner {
avoid.addCluster(clusterVO.getId());
continue;
}
s_logger.debug("Checking resources in Cluster: "+clusterId + " under Pod: "+clusterVO.getPodId());
//search for resources(hosts and storage) under this zone, pod, cluster.
DataCenterDeployment potentialPlan = new DataCenterDeployment(plan.getDataCenterId(), clusterVO.getPodId(), clusterVO.getId(), null, plan.getPoolId());

View File

@ -27,6 +27,7 @@ import javax.naming.ConfigurationException;
import org.apache.log4j.Logger;
import com.cloud.configuration.Config;
import com.cloud.configuration.dao.ConfigurationDao;
import com.cloud.dc.ClusterVO;
import com.cloud.dc.dao.ClusterDao;
@ -80,6 +81,7 @@ public abstract class AbstractStoragePoolAllocator extends AdapterBase implement
Random _rand;
boolean _dontMatter;
double _storageUsedThreshold = 1.0d;
double _storageAllocatedThreshold = 1.0d;
@Override
public boolean configure(String name, Map<String, Object> params) throws ConfigurationException {
@ -92,11 +94,17 @@ public abstract class AbstractStoragePoolAllocator extends AdapterBase implement
_extraBytesPerVolume = 0;
String storageUsedThreshold = configs.get("storage.capacity.threshold");
String storageUsedThreshold = _configDao.getValue(Config.StorageCapacityDisableThreshold.key());
if (storageUsedThreshold != null) {
_storageUsedThreshold = Double.parseDouble(storageUsedThreshold);
}
String storageAllocatedThreshold = _configDao.getValue(Config.StorageAllocatedCapacityDisableThreshold.key());
if (storageAllocatedThreshold != null) {
_storageAllocatedThreshold = Double.parseDouble(storageAllocatedThreshold);
}
_rand = new Random(System.currentTimeMillis());
_dontMatter = Boolean.parseBoolean(configs.get("storage.overwrite.provisioning"));
@ -192,11 +200,11 @@ public abstract class AbstractStoragePoolAllocator extends AdapterBase implement
if (stats != null) {
double usedPercentage = ((double)stats.getByteUsed() / (double)totalSize);
if (s_logger.isDebugEnabled()) {
s_logger.debug("Attempting to look for pool " + pool.getId() + " for storage, totalSize: " + pool.getCapacityBytes() + ", usedBytes: " + stats.getByteUsed() + ", usedPct: " + usedPercentage + ", threshold: " + _storageUsedThreshold);
s_logger.debug("Attempting to look for pool " + pool.getId() + " for storage, totalSize: " + pool.getCapacityBytes() + ", usedBytes: " + stats.getByteUsed() + ", usedPct: " + usedPercentage + ", disable threshold: " + _storageUsedThreshold);
}
if (usedPercentage >= _storageUsedThreshold) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Cannot allocate this pool " + pool.getId() + " for storage since its usage percentage: " +usedPercentage + " has crossed the storage.capacity.threshold: " + _storageUsedThreshold + ", skipping this pool");
s_logger.debug("Cannot allocate this pool " + pool.getId() + " for storage since its usage percentage: " +usedPercentage + " has crossed the pool.storage.capacity.disablethreshold: " + _storageUsedThreshold + ", skipping this pool");
}
return false;
}
@ -250,7 +258,15 @@ public abstract class AbstractStoragePoolAllocator extends AdapterBase implement
}
if (s_logger.isDebugEnabled()) {
s_logger.debug("Attempting to look for pool " + pool.getId() + " for storage, maxSize : " + (pool.getCapacityBytes() * storageOverprovisioningFactor) + ", totalSize : " + totalAllocatedSize + ", askingSize : " + askingSize);
s_logger.debug("Attempting to look for pool " + pool.getId() + " for storage, maxSize : " + (pool.getCapacityBytes() * storageOverprovisioningFactor) + ", totalSize : " + totalAllocatedSize + ", askingSize : " + askingSize + ", allocated disable threshold: " + _storageAllocatedThreshold);
}
double usedPercentage = (totalAllocatedSize + askingSize) / (double)(pool.getCapacityBytes() * storageOverprovisioningFactor);
if (usedPercentage > _storageAllocatedThreshold){
if (s_logger.isDebugEnabled()) {
s_logger.debug("Cannot allocate this pool " + pool.getId() + " for storage since its allocated percentage: " +usedPercentage + " has crossed the allocated pool.storage.allocated.capacity.disablethreshold: " + _storageAllocatedThreshold + ", skipping this pool");
}
return false;
}
if ((pool.getCapacityBytes() * storageOverprovisioningFactor) < (totalAllocatedSize + askingSize)) {

View File

@ -92,20 +92,21 @@ INSERT IGNORE INTO configuration VALUES ('Advanced', 'DEFAULT', 'management-serv
INSERT IGNORE INTO configuration VALUES ('Advanced', 'DEFAULT', 'management-server', 'project.smtp.useAuth', null, 'If true, use SMTP authentication when sending emails');
INSERT IGNORE INTO configuration VALUES ('Advanced', 'DEFAULT', 'management-server', 'project.smtp.username', null, 'Username for SMTP authentication (applies only if project.smtp.useAuth is true)');
INSERT IGNORE INTO configuration VALUES ('Advanced', 'DEFAULT', 'management-server', 'cluster.memory.capacity.disablethreshold' , .85, 'Percentage (as a value between 0 and 1) of memory utilization above which allocators will disable using the cluster for low memory available.');
INSERT IGNORE INTO configuration VALUES ('Advanced', 'DEFAULT', 'management-server', 'cluster.cpu.capacity.disablethreshold' , .85, 'Percentage (as a value between 0 and 1) of cpu utilization above which allocators will disable using the cluster for low cpu available.');
INSERT IGNORE INTO configuration VALUES ('Advanced', 'DEFAULT', 'management-server', 'cluster.storage.allocated.disablethreshold' , .85, 'Percentage (as a value between 0 and 1) of allocated storage utilization above which allocators will disable using the cluster for low allocated storage available.');
INSERT IGNORE INTO configuration VALUES ('Advanced', 'DEFAULT', 'management-server', 'zone.vlan.capacity.notificationthreshold' , .85, 'Percentage (as a value between 0 and 1) of Zone Vlan utilization above which alerts will be sent about low number of Zone Vlans.');
INSERT IGNORE INTO configuration VALUES ('Advanced', 'DEFAULT', 'management-server', 'cluster.localStorage.capacity.notificationthreshold' , .85, 'Percentage (as a value between 0 and 1) of Direct Network Public Ip Utilization above which alerts will be sent about low number of direct network public ips.');
INSERT IGNORE INTO configuration VALUES ('Advanced', 'DEFAULT', 'management-server', 'zone.directnetwork.publicip.capacity.notificationthreshold' , .85, 'Percentage (as a value between 0 and 1) of Direct Network Public Ip Utilization above which alerts will be sent about low number of direct network public ips.');
INSERT IGNORE INTO configuration VALUES ('Advanced', 'DEFAULT', 'management-server', 'zone.secstorage.capacity.notificationthreshold' , .85, 'Percentage (as a value between 0 and 1) of secondary storage utilization above which alerts will be sent about low storage available.');
INSERT IGNORE INTO configuration VALUES ('Alert', 'DEFAULT', 'management-server', 'cluster.memory.allocated.capacity.disablethreshold' , .85, 'Percentage (as a value between 0 and 1) of memory utilization above which allocators will disable using the cluster for low memory available.');
INSERT IGNORE INTO configuration VALUES ('Alert', 'DEFAULT', 'management-server', 'cluster.cpu.allocated.capacity.disablethreshold' , .85, 'Percentage (as a value between 0 and 1) of cpu utilization above which allocators will disable using the cluster for low cpu available.');
INSERT IGNORE INTO configuration VALUES ('Alert', 'DEFAULT', 'management-server', 'pool.storage.allocated.capacity.disablethreshold' , .85, 'Percentage (as a value between 0 and 1) of allocated storage utilization above which allocators will disable using the cluster for low allocated storage available.');
INSERT IGNORE INTO configuration VALUES ('Alert', 'DEFAULT', 'management-server', 'pool.storage.capacity.disablethreshold' , .85, 'Percentage (as a value between 0 and 1) of allocated storage utilization above which allocators will disable using the cluster for low allocated storage available.');
INSERT IGNORE INTO configuration VALUES ('Alert', 'DEFAULT', 'management-server', 'zone.vlan.capacity.notificationthreshold' , .75, 'Percentage (as a value between 0 and 1) of Zone Vlan utilization above which alerts will be sent about low number of Zone Vlans.');
INSERT IGNORE INTO configuration VALUES ('Alert', 'DEFAULT', 'management-server', 'cluster.localStorage.capacity.notificationthreshold' , .75, 'Percentage (as a value between 0 and 1) of Direct Network Public Ip Utilization above which alerts will be sent about low number of direct network public ips.');
INSERT IGNORE INTO configuration VALUES ('Alert', 'DEFAULT', 'management-server', 'zone.directnetwork.publicip.capacity.notificationthreshold' , .75, 'Percentage (as a value between 0 and 1) of Direct Network Public Ip Utilization above which alerts will be sent about low number of direct network public ips.');
INSERT IGNORE INTO configuration VALUES ('Alert', 'DEFAULT', 'management-server', 'zone.secstorage.capacity.notificationthreshold' , .75, 'Percentage (as a value between 0 and 1) of secondary storage utilization above which alerts will be sent about low storage available.');
update configuration set name = 'cluster.storage.allocated.capacity.notificationthreshold' where name = 'storage.allocated.capacity.threshold' ;
update configuration set name = 'cluster.storage.capacity.notificationthreshold' where name = 'storage.capacity.threshold' ;
update configuration set name = 'cluster.cpu.capacity.notificationthreshold' where name = 'cpu.capacity.threshold' ;
update configuration set name = 'cluster.memory.capacity.notificationthreshold' where name = 'memory.capacity.threshold' ;
update configuration set name = 'zone.virtualnetwork.publicip.capacity.notificationthreshold' where name = 'public.ip.capacity.threshold' ;
update configuration set name = 'pod.privateip.capacity.notificationthreshold' where name = 'private.ip.capacity.threshold' ;
update configuration set name = 'cluster.storage.allocated.capacity.notificationthreshold' , category = 'Alert' where name = 'storage.allocated.capacity.threshold' ;
update configuration set name = 'cluster.storage.capacity.notificationthreshold' , category = 'Alert' where name = 'storage.capacity.threshold' ;
update configuration set name = 'cluster.cpu.capacity.notificationthreshold' , category = 'Alert' where name = 'cpu.capacity.threshold' ;
update configuration set name = 'cluster.memory.capacity.notificationthreshold' , category = 'Alert' where name = 'memory.capacity.threshold' ;
update configuration set name = 'zone.virtualnetwork.publicip.capacity.notificationthreshold' , category = 'Alert' where name = 'public.ip.capacity.threshold' ;
update configuration set name = 'pod.privateip.capacity.notificationthreshold' , category = 'Alert' where name = 'private.ip.capacity.threshold' ;