Enable monitoring over JMX

Add the ability to disable some vms from being scheduled

Add ability to trigger ruleset updates from JMX

added a few more dangerous JMX operations
This commit is contained in:
Chiradeep Vittal 2011-08-30 23:28:30 -07:00
parent d81bc5a375
commit 65fb83035f
9 changed files with 351 additions and 22 deletions

View File

@ -70,6 +70,7 @@ public class SecurityIngressRulesCmd extends Command {
String signature;
Long seqNum;
Long vmId;
Long msId;
IpPortAndProto [] ruleSet;
public SecurityIngressRulesCmd() {
@ -158,5 +159,13 @@ public class SecurityIngressRulesCmd extends Command {
}
return count;
}
public void setMsId(long msId) {
this.msId = msId;
}
public Long getMsId() {
return msId;
}
}

View File

@ -946,7 +946,7 @@ def check_rule_log_for_vm(vmName, vmID, vmIP, domID, signature, seqno):
reprogramChain = False
rewriteLog = True
if (seqno > _seqno):
if (int(seqno) > int(_seqno)):
if (_signature != signature):
reprogramChain = True
util.SMlog("Seqno increased from %s to %s: reprogamming "\
@ -955,7 +955,7 @@ def check_rule_log_for_vm(vmName, vmID, vmIP, domID, signature, seqno):
util.SMlog("Seqno increased from %s to %s: but no change "\
"in signature for vm: skip programming ingress "\
"rules %s" % (_seqno, seqno, vmName))
elif (seqno < _seqno):
elif (int(seqno) < int(_seqno)):
util.SMlog("Seqno decreased from %s to %s: ignoring these "\
"ingress rules for vm %s" % (_seqno, seqno, vmName))
rewriteLog = False
@ -1047,22 +1047,28 @@ def network_rules(session, args):
if not reprogramDefault and not reprogramChain:
util.SMlog("No changes detected between current state and received state")
reason = 'seqno_same_sig_same'
if rewriteLog:
reason = 'seqno_increased_sig_same'
write_rule_log_for_vm(vm_name, vm_id, vm_ip, domid, signature, seqno)
util.SMlog("Programming network rules for vm %s seqno=%s signature=%s guestIp=%s,"\
" do nothing, reason=%s" % (vm_name, seqno, signature, vm_ip, reason))
return 'true'
if reprogramDefault:
util.SMlog("Change detected in vmId or vmIp or domId, resetting default rules")
default_network_rules(session, args)
if not reprogramChain:
util.SMlog("###Not programming any ingress rules since no changes detected?")
return 'true'
if reprogramDefault:
util.SMlog("Change detected in vmId or vmIp or domId, resetting default rules")
default_network_rules(session, args)
reason = 'domid_change'
rules = args.pop('rules')
lines = rules.split(' ')
util.SMlog(" programming network rules for IP: " + vm_ip + " vmname=" + vm_name)
util.SMlog("Programming network rules for vm %s seqno=%s numrules=%s signature=%s guestIp=%s,"\
" update iptables, reason=%s" % (vm_name, seqno, len(lines), signature, vm_ip, reason))
util.pread2(['iptables', '-F', vmchain])
for line in lines:

View File

@ -198,6 +198,22 @@ public class LocalSecurityGroupWorkQueue implements SecurityGroupWorkQueue {
}
}
@Override
public List<Long> getVmsInQueue() {
List<Long> vmIds = new ArrayList<Long>();
_lock.lock();
try {
Iterator<SecurityGroupWork> iter = _currentWork.iterator();
while (iter.hasNext()) {
vmIds.add(iter.next().getInstanceId());
}
} finally {
_lock.unlock();
}
return vmIds;
}
}

View File

@ -0,0 +1,30 @@
/**
* Copyright (C) 2011 Citrix Systems, Inc. All rights reserved.
*
* This software is licensed under the GNU General Public License v3 or later.
*
* It is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
package com.cloud.network.security;
import java.util.Set;
/**
* Keeps track of scheduling and update events for monitoring purposes.
*
*/
public interface RuleUpdateLog {
void logScheduledDetails(Set<Long> vmIds);
void logUpdateDetails(Long vmId, Long seqno);
}

View File

@ -148,7 +148,7 @@ public class SecurityGroupManagerImpl implements SecurityGroupManager, SecurityG
ScheduledExecutorService _executorPool;
ScheduledExecutorService _cleanupExecutor;
private long _serverId;
protected long _serverId;
private int _timeBetweenCleanups = TIME_BETWEEN_CLEANUPS; // seconds
protected int _numWorkerThreads = WORKER_THREAD_COUNT;
@ -360,7 +360,7 @@ public class SecurityGroupManagerImpl implements SecurityGroupManager, SecurityG
return DigestUtils.md5Hex(ruleset);
}
protected void handleVmStarted(VMInstanceVO vm) {
public void handleVmStarted(VMInstanceVO vm) {
if (vm.getType() != VirtualMachine.Type.User || !isVmSecurityGroupEnabled(vm.getId()))
return;
List<Long> affectedVms = getAffectedVmsForVmStart(vm);

View File

@ -17,13 +17,16 @@
*/
package com.cloud.network.security;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.TreeMap;
import java.util.TreeSet;
import java.util.concurrent.ConcurrentHashMap;
import javax.ejb.Local;
import javax.naming.ConfigurationException;
import com.cloud.agent.api.SecurityIngressRulesCmd;
import com.cloud.agent.manager.Commands;
@ -32,27 +35,22 @@ import com.cloud.network.security.SecurityGroupWork.Step;
import com.cloud.uservm.UserVm;
import com.cloud.utils.Profiler;
import com.cloud.utils.exception.CloudRuntimeException;
import com.cloud.utils.mgmt.JmxUtil;
import com.cloud.vm.VirtualMachine.State;
/**
* Same as the base class -- except it uses the abstracted security group work queue
*
*/
@Local(value={ SecurityGroupManager.class, SecurityGroupService.class })
public class SecurityGroupManagerImpl2 extends SecurityGroupManagerImpl {
/*private final String GET_ALLOWED_IPS_QUERY =
"select CONCAT(nics.ip4_address, '/32') from nics INNER JOIN " +
"(select vm_map_2.instance_id from " +
"(select security_ingress_rule.* from security_ingress_rule INNER JOIN " +
" security_group_vm_map ON security_ingress_rule.security_group_id=security_group_vm_map.security_group_id " +
" where security_group_vm_map.instance_id=?) AS ingress_rule_for_vm INNER JOIN " +
" security_group_vm_map AS vm_map_2 ON vm_map_2.security_group_id = ingress_rule_for_vm.allowed_network_id) AS instance " +
" ON nics.instance_id=instance.instance_id where nics.default_nic=1;";*/
public class SecurityGroupManagerImpl2 extends SecurityGroupManagerImpl{
SecurityGroupWorkQueue _workQueue = new LocalSecurityGroupWorkQueue();
SecurityManagerMBeanImpl _mBean;
WorkerThread[] _workers;
private Set<Long> _disabledVms = Collections.newSetFromMap(new ConcurrentHashMap<Long, Boolean>());
private boolean _schedulerDisabled = false;
protected class WorkerThread extends Thread {
@ -87,8 +85,13 @@ public class SecurityGroupManagerImpl2 extends SecurityGroupManagerImpl {
if (affectedVms.size() == 0) {
return;
}
if (_schedulerDisabled) {
s_logger.debug("Security Group Mgr v2: scheduler disabled, doing nothing for " + affectedVms.size() + " vms");
return;
}
Set<Long> workItems = new TreeSet<Long>();
workItems.addAll(affectedVms);
workItems.removeAll(_disabledVms);
if (s_logger.isTraceEnabled()) {
s_logger.trace("Security Group Mgr v2: scheduling ruleset updates for " + affectedVms.size() + " vms " + " (unique=" + workItems.size() + "), current queue size=" + _workQueue.size());
@ -104,6 +107,7 @@ public class SecurityGroupManagerImpl2 extends SecurityGroupManagerImpl {
}
}
int newJobs = _workQueue.submitWorkForVms(workItems);
_mBean.logScheduledDetails(workItems);
p.stop();
if (s_logger.isTraceEnabled()){
s_logger.trace("Security Group Mgr v2: done scheduling ruleset updates for " + workItems.size() + " vms: num new jobs=" +
@ -111,6 +115,8 @@ public class SecurityGroupManagerImpl2 extends SecurityGroupManagerImpl {
}
}
@Override
public boolean start() {
@ -139,6 +145,7 @@ public class SecurityGroupManagerImpl2 extends SecurityGroupManagerImpl {
}
work.setLogsequenceNumber(rulesetLog.getLogsequence());
sendRulesetUpdates(work);
_mBean.logUpdateDetails(work.getInstanceId(), work.getLogsequenceNumber());
}catch (Exception e) {
s_logger.error("Problem during SG work " + work, e);
work.setStep(Step.Error);
@ -151,7 +158,7 @@ public class SecurityGroupManagerImpl2 extends SecurityGroupManagerImpl {
}
protected void sendRulesetUpdates(SecurityGroupWork work){
public void sendRulesetUpdates(SecurityGroupWork work){
Long userVmId = work.getInstanceId();
UserVm vm = _userVMDao.findById(userVmId);
@ -165,6 +172,7 @@ public class SecurityGroupManagerImpl2 extends SecurityGroupManagerImpl {
SecurityIngressRulesCmd cmd = generateRulesetCmd(vm.getInstanceName(), vm.getPrivateIpAddress(),
vm.getPrivateMacAddress(), vm.getId(), null,
work.getLogsequenceNumber(), rules);
cmd.setMsId(_serverId);
if (s_logger.isTraceEnabled()) {
s_logger.trace("SecurityGroupManager v2: sending ruleset update for vm " + vm.getInstanceName() +
": num rules=" + cmd.getRuleSet().length + " num cidrs=" + cmd.getTotalNumCidrs() + " sig=" + cmd.getSignature());
@ -235,4 +243,59 @@ public class SecurityGroupManagerImpl2 extends SecurityGroupManagerImpl {
return allowed;
}
public int getQueueSize() {
return _workQueue.size();
}
public SecurityGroupWorkQueue getWorkQueue() {
return _workQueue;
}
@Override
public boolean configure(String name, Map<String, Object> params) throws ConfigurationException {
_mBean = new SecurityManagerMBeanImpl(this);
try {
JmxUtil.registerMBean("SecurityGroupManager", "SecurityGroupManagerImpl2", _mBean);
} catch (Exception e){
s_logger.error("Failed to register MBean", e);
}
return super.configure(name, params);
}
public void disableSchedulerForVm(Long vmId, boolean disable) {
if (disable) {
_disabledVms.add(vmId);
} else {
_disabledVms.remove(vmId);
}
s_logger.warn("JMX operation: Scheduler state for vm " + vmId + ": new state disabled=" + disable);
}
public Long[] getDisabledVmsForScheduler() {
Long [] result = new Long[_disabledVms.size()];
return _disabledVms.toArray(result );
}
public void enableAllVmsForScheduler() {
s_logger.warn("Cleared list of disabled VMs (JMX operation?)");
_disabledVms.clear();
}
public void disableScheduler(boolean disable) {
_schedulerDisabled = disable;
s_logger.warn("JMX operation: Scheduler state changed: new state disabled=" + disable);
}
public boolean isSchedulerDisabled() {
return _schedulerDisabled;
}
public void clearWorkQueue() {
_workQueue.clear();
s_logger.warn("Cleared the work queue (possible JMX operation)");
}
}

View File

@ -0,0 +1,58 @@
/**
* Copyright (C) 2011 Citrix Systems, Inc. All rights reserved.
*
* This software is licensed under the GNU General Public License v3 or later.
*
* It is free software: you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation, either version 3 of the License, or any later version.
* This program is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with this program. If not, see <http://www.gnu.org/licenses/>.
*
*/
package com.cloud.network.security;
import java.util.List;
import java.util.Map;
import java.util.Date;
/**
* Allows JMX access
*
*/
public interface SecurityGroupManagerMBean {
void enableUpdateMonitor(boolean enable);
void disableSchedulerForVm(Long vmId);
void enableSchedulerForVm(Long vmId);
Long[] getDisabledVmsForScheduler();
void enableSchedulerForAllVms();
Map<Long, Date> getScheduledTimestamps();
Map<Long, Date> getLastUpdateSentTimestamps();
int getQueueSize();
List<Long> getVmsInQueue();
void scheduleRulesetUpdateForVm(Long vmId);
void tryRulesetUpdateForVmBypassSchedulerVeryDangerous(Long vmId, Long seqno);
void simulateVmStart(Long vmId);
void disableSchedulerEntirelyVeryDangerous(boolean disable);
boolean isSchedulerDisabledEntirely();
void clearSchedulerQueueVeryDangerous();
}

View File

@ -37,4 +37,6 @@ public interface SecurityGroupWorkQueue {
int size();
void clear();
List<Long> getVmsInQueue();
}

View File

@ -0,0 +1,145 @@
package com.cloud.network.security;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.Set;
import java.util.concurrent.ConcurrentHashMap;
import javax.management.StandardMBean;
import com.cloud.hypervisor.Hypervisor.HypervisorType;
import com.cloud.network.security.LocalSecurityGroupWorkQueue.LocalSecurityGroupWork;
import com.cloud.network.security.SecurityGroupWork.Step;
import com.cloud.vm.VMInstanceVO;
import com.cloud.vm.VirtualMachine.Type;
public class SecurityManagerMBeanImpl extends StandardMBean implements SecurityGroupManagerMBean, RuleUpdateLog {
SecurityGroupManagerImpl2 _sgMgr;
boolean _monitoringEnabled = false;
//keep track of last scheduled, last update sent and last seqno sent per vm. Make it available over JMX
Map<Long, Date> _scheduleTimestamps = new ConcurrentHashMap<Long, Date>(4000, 100, 64);
Map<Long, Date> _updateTimestamps = new ConcurrentHashMap<Long, Date>(4000, 100, 64);
protected SecurityManagerMBeanImpl(SecurityGroupManagerImpl2 securityGroupManager) {
super(SecurityGroupManagerMBean.class, false);
this._sgMgr = securityGroupManager;
}
@Override
public int getQueueSize() {
return this._sgMgr.getQueueSize();
}
@Override
public void logUpdateDetails(Long vmId, Long seqno) {
if (_monitoringEnabled) {
_updateTimestamps.put(vmId, new Date());
}
}
@Override
public void logScheduledDetails(Set<Long> vmIds) {
if (_monitoringEnabled) {
for (Long vmId : vmIds) {
_scheduleTimestamps.put(vmId, new Date());
}
}
}
@Override
public void enableUpdateMonitor(boolean enable) {
_monitoringEnabled = enable;
if (!enable) {
_updateTimestamps.clear();
_scheduleTimestamps.clear();
}
}
@Override
public Map<Long, Date> getScheduledTimestamps() {
return _scheduleTimestamps;
}
@Override
public Map<Long, Date> getLastUpdateSentTimestamps() {
return _updateTimestamps;
}
@Override
public List<Long> getVmsInQueue() {
return _sgMgr.getWorkQueue().getVmsInQueue();
}
@Override
public void disableSchedulerForVm(Long vmId) {
_sgMgr.disableSchedulerForVm(vmId, true);
}
@Override
public void enableSchedulerForVm(Long vmId) {
_sgMgr.disableSchedulerForVm(vmId, false);
}
@Override
public Long[] getDisabledVmsForScheduler() {
return _sgMgr.getDisabledVmsForScheduler();
}
@Override
public void enableSchedulerForAllVms() {
_sgMgr.enableAllVmsForScheduler();
}
@Override
public void scheduleRulesetUpdateForVm(Long vmId) {
List<Long> affectedVms = new ArrayList<Long>(1);
affectedVms.add(vmId);
_sgMgr.scheduleRulesetUpdateToHosts(affectedVms, true, null);
}
@Override
public void tryRulesetUpdateForVmBypassSchedulerVeryDangerous(Long vmId, Long seqno) {
LocalSecurityGroupWork work = new LocalSecurityGroupWorkQueue.LocalSecurityGroupWork(vmId, seqno, Step.Scheduled);
_sgMgr.sendRulesetUpdates(work);
}
@Override
public void simulateVmStart(Long vmId) {
//all we need is the vmId
VMInstanceVO vm = new VMInstanceVO(vmId, 5, "foo", "foo", Type.User, null, HypervisorType.Any, 8, 1, 1, false, false);
_sgMgr.handleVmStarted(vm);
}
@Override
public void disableSchedulerEntirelyVeryDangerous(boolean disable) {
_sgMgr.disableScheduler(disable);
}
@Override
public boolean isSchedulerDisabledEntirely() {
return _sgMgr.isSchedulerDisabled();
}
@Override
public void clearSchedulerQueueVeryDangerous() {
_sgMgr.clearWorkQueue();
}
}