From 65fb83035f34743022856033a0a63a157b599916 Mon Sep 17 00:00:00 2001 From: Chiradeep Vittal Date: Tue, 30 Aug 2011 23:28:30 -0700 Subject: [PATCH] Enable monitoring over JMX Add the ability to disable some vms from being scheduled Add ability to trigger ruleset updates from JMX added a few more dangerous JMX operations --- .../agent/api/SecurityIngressRulesCmd.java | 9 ++ scripts/vm/hypervisor/xenserver/vmops | 22 ++- .../security/LocalSecurityGroupWorkQueue.java | 16 ++ .../cloud/network/security/RuleUpdateLog.java | 30 ++++ .../security/SecurityGroupManagerImpl.java | 4 +- .../security/SecurityGroupManagerImpl2.java | 87 +++++++++-- .../security/SecurityGroupManagerMBean.java | 58 +++++++ .../security/SecurityGroupWorkQueue.java | 2 + .../security/SecurityManagerMBeanImpl.java | 145 ++++++++++++++++++ 9 files changed, 351 insertions(+), 22 deletions(-) create mode 100644 server/src/com/cloud/network/security/RuleUpdateLog.java create mode 100644 server/src/com/cloud/network/security/SecurityGroupManagerMBean.java create mode 100644 server/src/com/cloud/network/security/SecurityManagerMBeanImpl.java diff --git a/api/src/com/cloud/agent/api/SecurityIngressRulesCmd.java b/api/src/com/cloud/agent/api/SecurityIngressRulesCmd.java index 97adc3aa74a..0dfd81a2436 100644 --- a/api/src/com/cloud/agent/api/SecurityIngressRulesCmd.java +++ b/api/src/com/cloud/agent/api/SecurityIngressRulesCmd.java @@ -70,6 +70,7 @@ public class SecurityIngressRulesCmd extends Command { String signature; Long seqNum; Long vmId; + Long msId; IpPortAndProto [] ruleSet; public SecurityIngressRulesCmd() { @@ -158,5 +159,13 @@ public class SecurityIngressRulesCmd extends Command { } return count; } + + public void setMsId(long msId) { + this.msId = msId; + } + + public Long getMsId() { + return msId; + } } diff --git a/scripts/vm/hypervisor/xenserver/vmops b/scripts/vm/hypervisor/xenserver/vmops index 7564528e92c..3cea9d533b4 100755 --- a/scripts/vm/hypervisor/xenserver/vmops +++ b/scripts/vm/hypervisor/xenserver/vmops @@ -946,7 +946,7 @@ def check_rule_log_for_vm(vmName, vmID, vmIP, domID, signature, seqno): reprogramChain = False rewriteLog = True - if (seqno > _seqno): + if (int(seqno) > int(_seqno)): if (_signature != signature): reprogramChain = True util.SMlog("Seqno increased from %s to %s: reprogamming "\ @@ -955,7 +955,7 @@ def check_rule_log_for_vm(vmName, vmID, vmIP, domID, signature, seqno): util.SMlog("Seqno increased from %s to %s: but no change "\ "in signature for vm: skip programming ingress "\ "rules %s" % (_seqno, seqno, vmName)) - elif (seqno < _seqno): + elif (int(seqno) < int(_seqno)): util.SMlog("Seqno decreased from %s to %s: ignoring these "\ "ingress rules for vm %s" % (_seqno, seqno, vmName)) rewriteLog = False @@ -1047,22 +1047,28 @@ def network_rules(session, args): if not reprogramDefault and not reprogramChain: util.SMlog("No changes detected between current state and received state") + reason = 'seqno_same_sig_same' if rewriteLog: + reason = 'seqno_increased_sig_same' write_rule_log_for_vm(vm_name, vm_id, vm_ip, domid, signature, seqno) + util.SMlog("Programming network rules for vm %s seqno=%s signature=%s guestIp=%s,"\ + " do nothing, reason=%s" % (vm_name, seqno, signature, vm_ip, reason)) return 'true' - if reprogramDefault: - util.SMlog("Change detected in vmId or vmIp or domId, resetting default rules") - default_network_rules(session, args) - if not reprogramChain: util.SMlog("###Not programming any ingress rules since no changes detected?") return 'true' - + + if reprogramDefault: + util.SMlog("Change detected in vmId or vmIp or domId, resetting default rules") + default_network_rules(session, args) + reason = 'domid_change' + rules = args.pop('rules') lines = rules.split(' ') - util.SMlog(" programming network rules for IP: " + vm_ip + " vmname=" + vm_name) + util.SMlog("Programming network rules for vm %s seqno=%s numrules=%s signature=%s guestIp=%s,"\ + " update iptables, reason=%s" % (vm_name, seqno, len(lines), signature, vm_ip, reason)) util.pread2(['iptables', '-F', vmchain]) for line in lines: diff --git a/server/src/com/cloud/network/security/LocalSecurityGroupWorkQueue.java b/server/src/com/cloud/network/security/LocalSecurityGroupWorkQueue.java index 4b7c3225a33..72676aec212 100644 --- a/server/src/com/cloud/network/security/LocalSecurityGroupWorkQueue.java +++ b/server/src/com/cloud/network/security/LocalSecurityGroupWorkQueue.java @@ -198,6 +198,22 @@ public class LocalSecurityGroupWorkQueue implements SecurityGroupWorkQueue { } } + + + @Override + public List getVmsInQueue() { + List vmIds = new ArrayList(); + _lock.lock(); + try { + Iterator iter = _currentWork.iterator(); + while (iter.hasNext()) { + vmIds.add(iter.next().getInstanceId()); + } + } finally { + _lock.unlock(); + } + return vmIds; + } } diff --git a/server/src/com/cloud/network/security/RuleUpdateLog.java b/server/src/com/cloud/network/security/RuleUpdateLog.java new file mode 100644 index 00000000000..24bcd332c8d --- /dev/null +++ b/server/src/com/cloud/network/security/RuleUpdateLog.java @@ -0,0 +1,30 @@ +/** + * Copyright (C) 2011 Citrix Systems, Inc. All rights reserved. + * + * This software is licensed under the GNU General Public License v3 or later. + * + * It is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or any later version. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + */ +package com.cloud.network.security; + +import java.util.Set; + +/** + * Keeps track of scheduling and update events for monitoring purposes. + * + */ +public interface RuleUpdateLog { + void logScheduledDetails(Set vmIds); + void logUpdateDetails(Long vmId, Long seqno); + +} diff --git a/server/src/com/cloud/network/security/SecurityGroupManagerImpl.java b/server/src/com/cloud/network/security/SecurityGroupManagerImpl.java index 19aca5bd8e6..1aa6981bb0b 100755 --- a/server/src/com/cloud/network/security/SecurityGroupManagerImpl.java +++ b/server/src/com/cloud/network/security/SecurityGroupManagerImpl.java @@ -148,7 +148,7 @@ public class SecurityGroupManagerImpl implements SecurityGroupManager, SecurityG ScheduledExecutorService _executorPool; ScheduledExecutorService _cleanupExecutor; - private long _serverId; + protected long _serverId; private int _timeBetweenCleanups = TIME_BETWEEN_CLEANUPS; // seconds protected int _numWorkerThreads = WORKER_THREAD_COUNT; @@ -360,7 +360,7 @@ public class SecurityGroupManagerImpl implements SecurityGroupManager, SecurityG return DigestUtils.md5Hex(ruleset); } - protected void handleVmStarted(VMInstanceVO vm) { + public void handleVmStarted(VMInstanceVO vm) { if (vm.getType() != VirtualMachine.Type.User || !isVmSecurityGroupEnabled(vm.getId())) return; List affectedVms = getAffectedVmsForVmStart(vm); diff --git a/server/src/com/cloud/network/security/SecurityGroupManagerImpl2.java b/server/src/com/cloud/network/security/SecurityGroupManagerImpl2.java index cab68b25cc5..43566fc6d57 100644 --- a/server/src/com/cloud/network/security/SecurityGroupManagerImpl2.java +++ b/server/src/com/cloud/network/security/SecurityGroupManagerImpl2.java @@ -17,13 +17,16 @@ */ package com.cloud.network.security; +import java.util.Collections; import java.util.List; import java.util.Map; import java.util.Set; import java.util.TreeMap; import java.util.TreeSet; +import java.util.concurrent.ConcurrentHashMap; import javax.ejb.Local; +import javax.naming.ConfigurationException; import com.cloud.agent.api.SecurityIngressRulesCmd; import com.cloud.agent.manager.Commands; @@ -32,27 +35,22 @@ import com.cloud.network.security.SecurityGroupWork.Step; import com.cloud.uservm.UserVm; import com.cloud.utils.Profiler; import com.cloud.utils.exception.CloudRuntimeException; +import com.cloud.utils.mgmt.JmxUtil; import com.cloud.vm.VirtualMachine.State; + /** * Same as the base class -- except it uses the abstracted security group work queue * */ @Local(value={ SecurityGroupManager.class, SecurityGroupService.class }) -public class SecurityGroupManagerImpl2 extends SecurityGroupManagerImpl { - /*private final String GET_ALLOWED_IPS_QUERY = - "select CONCAT(nics.ip4_address, '/32') from nics INNER JOIN " + - "(select vm_map_2.instance_id from " + - "(select security_ingress_rule.* from security_ingress_rule INNER JOIN " + - " security_group_vm_map ON security_ingress_rule.security_group_id=security_group_vm_map.security_group_id " + - " where security_group_vm_map.instance_id=?) AS ingress_rule_for_vm INNER JOIN " + - " security_group_vm_map AS vm_map_2 ON vm_map_2.security_group_id = ingress_rule_for_vm.allowed_network_id) AS instance " + - " ON nics.instance_id=instance.instance_id where nics.default_nic=1;";*/ - +public class SecurityGroupManagerImpl2 extends SecurityGroupManagerImpl{ SecurityGroupWorkQueue _workQueue = new LocalSecurityGroupWorkQueue(); - + SecurityManagerMBeanImpl _mBean; WorkerThread[] _workers; + private Set _disabledVms = Collections.newSetFromMap(new ConcurrentHashMap()); + private boolean _schedulerDisabled = false; protected class WorkerThread extends Thread { @@ -87,8 +85,13 @@ public class SecurityGroupManagerImpl2 extends SecurityGroupManagerImpl { if (affectedVms.size() == 0) { return; } + if (_schedulerDisabled) { + s_logger.debug("Security Group Mgr v2: scheduler disabled, doing nothing for " + affectedVms.size() + " vms"); + return; + } Set workItems = new TreeSet(); workItems.addAll(affectedVms); + workItems.removeAll(_disabledVms); if (s_logger.isTraceEnabled()) { s_logger.trace("Security Group Mgr v2: scheduling ruleset updates for " + affectedVms.size() + " vms " + " (unique=" + workItems.size() + "), current queue size=" + _workQueue.size()); @@ -104,6 +107,7 @@ public class SecurityGroupManagerImpl2 extends SecurityGroupManagerImpl { } } int newJobs = _workQueue.submitWorkForVms(workItems); + _mBean.logScheduledDetails(workItems); p.stop(); if (s_logger.isTraceEnabled()){ s_logger.trace("Security Group Mgr v2: done scheduling ruleset updates for " + workItems.size() + " vms: num new jobs=" + @@ -111,6 +115,8 @@ public class SecurityGroupManagerImpl2 extends SecurityGroupManagerImpl { } } + + @Override public boolean start() { @@ -139,6 +145,7 @@ public class SecurityGroupManagerImpl2 extends SecurityGroupManagerImpl { } work.setLogsequenceNumber(rulesetLog.getLogsequence()); sendRulesetUpdates(work); + _mBean.logUpdateDetails(work.getInstanceId(), work.getLogsequenceNumber()); }catch (Exception e) { s_logger.error("Problem during SG work " + work, e); work.setStep(Step.Error); @@ -151,7 +158,7 @@ public class SecurityGroupManagerImpl2 extends SecurityGroupManagerImpl { } - protected void sendRulesetUpdates(SecurityGroupWork work){ + public void sendRulesetUpdates(SecurityGroupWork work){ Long userVmId = work.getInstanceId(); UserVm vm = _userVMDao.findById(userVmId); @@ -165,6 +172,7 @@ public class SecurityGroupManagerImpl2 extends SecurityGroupManagerImpl { SecurityIngressRulesCmd cmd = generateRulesetCmd(vm.getInstanceName(), vm.getPrivateIpAddress(), vm.getPrivateMacAddress(), vm.getId(), null, work.getLogsequenceNumber(), rules); + cmd.setMsId(_serverId); if (s_logger.isTraceEnabled()) { s_logger.trace("SecurityGroupManager v2: sending ruleset update for vm " + vm.getInstanceName() + ": num rules=" + cmd.getRuleSet().length + " num cidrs=" + cmd.getTotalNumCidrs() + " sig=" + cmd.getSignature()); @@ -235,4 +243,59 @@ public class SecurityGroupManagerImpl2 extends SecurityGroupManagerImpl { return allowed; } + + public int getQueueSize() { + return _workQueue.size(); + } + + public SecurityGroupWorkQueue getWorkQueue() { + return _workQueue; + } + + + @Override + public boolean configure(String name, Map params) throws ConfigurationException { + _mBean = new SecurityManagerMBeanImpl(this); + try { + JmxUtil.registerMBean("SecurityGroupManager", "SecurityGroupManagerImpl2", _mBean); + } catch (Exception e){ + s_logger.error("Failed to register MBean", e); + } + return super.configure(name, params); + } + + public void disableSchedulerForVm(Long vmId, boolean disable) { + if (disable) { + _disabledVms.add(vmId); + } else { + _disabledVms.remove(vmId); + } + s_logger.warn("JMX operation: Scheduler state for vm " + vmId + ": new state disabled=" + disable); + + } + + public Long[] getDisabledVmsForScheduler() { + Long [] result = new Long[_disabledVms.size()]; + return _disabledVms.toArray(result ); + } + + public void enableAllVmsForScheduler() { + s_logger.warn("Cleared list of disabled VMs (JMX operation?)"); + _disabledVms.clear(); + } + + public void disableScheduler(boolean disable) { + _schedulerDisabled = disable; + s_logger.warn("JMX operation: Scheduler state changed: new state disabled=" + disable); + } + + public boolean isSchedulerDisabled() { + return _schedulerDisabled; + } + + public void clearWorkQueue() { + _workQueue.clear(); + s_logger.warn("Cleared the work queue (possible JMX operation)"); + } + } diff --git a/server/src/com/cloud/network/security/SecurityGroupManagerMBean.java b/server/src/com/cloud/network/security/SecurityGroupManagerMBean.java new file mode 100644 index 00000000000..448b0baa81a --- /dev/null +++ b/server/src/com/cloud/network/security/SecurityGroupManagerMBean.java @@ -0,0 +1,58 @@ +/** + * Copyright (C) 2011 Citrix Systems, Inc. All rights reserved. + * + * This software is licensed under the GNU General Public License v3 or later. + * + * It is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 3 of the License, or any later version. + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + * + */ +package com.cloud.network.security; + +import java.util.List; +import java.util.Map; +import java.util.Date; + +/** + * Allows JMX access + * + */ +public interface SecurityGroupManagerMBean { + void enableUpdateMonitor(boolean enable); + + void disableSchedulerForVm(Long vmId); + + void enableSchedulerForVm(Long vmId); + + Long[] getDisabledVmsForScheduler(); + + void enableSchedulerForAllVms(); + + Map getScheduledTimestamps(); + + Map getLastUpdateSentTimestamps(); + + int getQueueSize(); + + List getVmsInQueue(); + + void scheduleRulesetUpdateForVm(Long vmId); + + void tryRulesetUpdateForVmBypassSchedulerVeryDangerous(Long vmId, Long seqno); + + void simulateVmStart(Long vmId); + + void disableSchedulerEntirelyVeryDangerous(boolean disable); + + boolean isSchedulerDisabledEntirely(); + + void clearSchedulerQueueVeryDangerous(); +} diff --git a/server/src/com/cloud/network/security/SecurityGroupWorkQueue.java b/server/src/com/cloud/network/security/SecurityGroupWorkQueue.java index 29a592731c2..e6ada37cc71 100644 --- a/server/src/com/cloud/network/security/SecurityGroupWorkQueue.java +++ b/server/src/com/cloud/network/security/SecurityGroupWorkQueue.java @@ -37,4 +37,6 @@ public interface SecurityGroupWorkQueue { int size(); void clear(); + + List getVmsInQueue(); } diff --git a/server/src/com/cloud/network/security/SecurityManagerMBeanImpl.java b/server/src/com/cloud/network/security/SecurityManagerMBeanImpl.java new file mode 100644 index 00000000000..dc8d15196b2 --- /dev/null +++ b/server/src/com/cloud/network/security/SecurityManagerMBeanImpl.java @@ -0,0 +1,145 @@ +package com.cloud.network.security; + +import java.util.ArrayList; +import java.util.Date; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.concurrent.ConcurrentHashMap; + +import javax.management.StandardMBean; + +import com.cloud.hypervisor.Hypervisor.HypervisorType; +import com.cloud.network.security.LocalSecurityGroupWorkQueue.LocalSecurityGroupWork; +import com.cloud.network.security.SecurityGroupWork.Step; +import com.cloud.vm.VMInstanceVO; +import com.cloud.vm.VirtualMachine.Type; + +public class SecurityManagerMBeanImpl extends StandardMBean implements SecurityGroupManagerMBean, RuleUpdateLog { + SecurityGroupManagerImpl2 _sgMgr; + boolean _monitoringEnabled = false; + //keep track of last scheduled, last update sent and last seqno sent per vm. Make it available over JMX + Map _scheduleTimestamps = new ConcurrentHashMap(4000, 100, 64); + Map _updateTimestamps = new ConcurrentHashMap(4000, 100, 64); + + + protected SecurityManagerMBeanImpl(SecurityGroupManagerImpl2 securityGroupManager) { + super(SecurityGroupManagerMBean.class, false); + this._sgMgr = securityGroupManager; + } + + + + + @Override + public int getQueueSize() { + return this._sgMgr.getQueueSize(); + } + + @Override + public void logUpdateDetails(Long vmId, Long seqno) { + if (_monitoringEnabled) { + _updateTimestamps.put(vmId, new Date()); + } + + } + + @Override + public void logScheduledDetails(Set vmIds) { + if (_monitoringEnabled) { + for (Long vmId : vmIds) { + _scheduleTimestamps.put(vmId, new Date()); + } + } + } + + @Override + public void enableUpdateMonitor(boolean enable) { + _monitoringEnabled = enable; + if (!enable) { + _updateTimestamps.clear(); + _scheduleTimestamps.clear(); + } + } + + + @Override + public Map getScheduledTimestamps() { + return _scheduleTimestamps; + } + + @Override + public Map getLastUpdateSentTimestamps() { + return _updateTimestamps; + } + + + @Override + public List getVmsInQueue() { + return _sgMgr.getWorkQueue().getVmsInQueue(); + } + + + + + @Override + public void disableSchedulerForVm(Long vmId) { + _sgMgr.disableSchedulerForVm(vmId, true); + + } + + @Override + public void enableSchedulerForVm(Long vmId) { + _sgMgr.disableSchedulerForVm(vmId, false); + + } + + @Override + public Long[] getDisabledVmsForScheduler() { + return _sgMgr.getDisabledVmsForScheduler(); + } + + + @Override + public void enableSchedulerForAllVms() { + _sgMgr.enableAllVmsForScheduler(); + + } + + + @Override + public void scheduleRulesetUpdateForVm(Long vmId) { + List affectedVms = new ArrayList(1); + affectedVms.add(vmId); + _sgMgr.scheduleRulesetUpdateToHosts(affectedVms, true, null); + } + + + @Override + public void tryRulesetUpdateForVmBypassSchedulerVeryDangerous(Long vmId, Long seqno) { + LocalSecurityGroupWork work = new LocalSecurityGroupWorkQueue.LocalSecurityGroupWork(vmId, seqno, Step.Scheduled); + _sgMgr.sendRulesetUpdates(work); + } + + @Override + public void simulateVmStart(Long vmId) { + //all we need is the vmId + VMInstanceVO vm = new VMInstanceVO(vmId, 5, "foo", "foo", Type.User, null, HypervisorType.Any, 8, 1, 1, false, false); + _sgMgr.handleVmStarted(vm); + } + + @Override + public void disableSchedulerEntirelyVeryDangerous(boolean disable) { + _sgMgr.disableScheduler(disable); + } + + @Override + public boolean isSchedulerDisabledEntirely() { + return _sgMgr.isSchedulerDisabled(); + } + + @Override + public void clearSchedulerQueueVeryDangerous() { + _sgMgr.clearWorkQueue(); + } +}