Bug 11933 - Unable to add Primary Storage (OCFS2) to a OVM Cluster

Bug 11948 - Cannot add a new OVM host to an existing OVM cluster
Bug 11699 - OVM - add host previously used in other OVM cluster > host went to alert state> host cleanup procedure needed

status 11933: resolve fixed
status 11948: resolve fixed
status 11699: resolve fixed

replace ovs-agent ocfs2 functions with our implementation.
ovs-agent's implementation doesn't check error condition, it can only run if everything is correct.
we also add check for used host without clean up, clean up procedure will print out as error message

reviewed-by: edison
This commit is contained in:
frank 2011-11-10 14:46:56 -08:00
parent b53dc42d0a
commit 991ac4835a
4 changed files with 105 additions and 60 deletions

View File

@ -13,7 +13,8 @@ from OvmObjectModule import *
import types
import logging
import popen2
from OvmFaultConstants import toErrCode, dispatchErrCode, NoVmFoundException
import subprocess
from OvmFaultConstants import toErrCode, dispatchErrCode, NoVmFoundException, ShellExceutedFailedException
from xmlrpclib import Fault as XmlRpcFault
from OVSCommons import *
from OvmLoggerModule import OvmLogger
@ -26,6 +27,7 @@ HEARTBEAT_DIR='heart_beat'
ETC_HOSTS='/etc/hosts'
HOSTNAME_FILE='/etc/sysconfig/network'
OWNER_FILE_PREFIX='host_'
OCFS2_CONF='/etc/ocfs2/cluster.conf'
logger = OvmLogger('OvmCommon')
@ -93,10 +95,18 @@ def BytesToM(bytes):
def BytesToG(bytes):
return bytes/(1024*1024*1024)
def runCmd(cmds):
process = subprocess.Popen(cmds, shell=True, stdout=subprocess.PIPE, stderr=subprocess.PIPE)
stdout, stderr = process.communicate()
if process.returncode != 0:
raise ShellExceutedFailedException(stderr, process.returncode)
return stdout
def doCmd(lst):
cmds = [str(i) for i in lst]
logger.debug(doCmd, ' '.join(cmds))
res = run_cmd(cmds)
cmdStr = ' '.join(cmds)
logger.debug(doCmd, cmdStr)
res = runCmd(cmdStr)
logger.debug(doCmd, 'result:' + res)
return res

View File

@ -4,10 +4,20 @@ OvmVmErrCodeStub = 2000
OvmStoragePoolErrCodeStub = 3000
OvmNetworkErrCodeStub = 4000
OvmVolumeErrCodeStub = 5000
OvmOCFS2ErrCodeStub = 6000
class NoVmFoundException(Exception):
pass
class ShellExceutedFailedException(Exception):
stderr = ''
errCode = -1000
def __init__(self, err, code):
Exception.__init__(self, "%s, return code:%s"%(err, code))
self.stderr = err
self.errCode = code
errCode = {
# OvmDispatch is not class, these error codes are reserved
"OvmDispatch.InvalidCallMethodFormat":OvmDispatcherStub+1,
@ -55,6 +65,8 @@ errCode = {
"OvmVolume.createDataDisk":OvmVolumeErrCodeStub+1,
"OvmVolume.createFromTemplate":OvmVolumeErrCodeStub+2,
"OvmVolume.destroy":OvmVolumeErrCodeStub+3,
"OvmOCFS2._addNode":OvmOCFS2ErrCodeStub+1,
}

View File

@ -0,0 +1,62 @@
from OvmCommonModule import *
logger = OvmLogger('OvmOCFS2')
class OvmOCFS2(OvmObject):
def _prepareConf(self, cluster):
conf = '''cluster:
node_count = 0
name = %s
'''%cluster
dir = dirname(OCFS2_CONF)
if not isdir(dir):
os.makedirs(dir)
fd = open(OCFS2_CONF, 'w')
fd.write(conf)
fd.close()
def _addNode(self, name, nodeNum, ip, port, cluster, isOnline=True):
nodePath = '/sys/kernel/config/cluster/%s/node/%s'%(cluster, name)
if exists(nodePath):
logger.debug(OvmOCFS2._addNode, "node %s already exists, skip it(%s)"%(name, nodePath))
return
if not isOnline:
cmds = ['o2cb_ctl -C -n', name, '-t node', '-a number=%s'%nodeNum, '-a ip_address=%s'%ip, '-a ip_port=%s'%port, '-a cluster=%s'%cluster]
else:
cmds = ['o2cb_ctl -C -i -n', name, '-t node', '-a number=%s'%nodeNum, '-a ip_address=%s'%ip, '-a ip_port=%s'%port, '-a cluster=%s'%cluster]
try:
doCmd(cmds)
except ShellExceutedFailedException, e:
if e.errCode == 239 or "already exists" in e.stderr:
logger.debug(OvmOCFS2._addNode, "node %s already exists, skip it(%s)"%(name, e.stderr))
else:
raise e
def _isClusterOnline(self, cluster):
cmds = ['service o2cb status', cluster]
res = doCmd(cmds)
for line in res.split('\n'):
if not 'Checking O2CB cluster' in line: continue
return not 'Offline' in line
def _start(self, cluster):
#blank line are answer by clicking enter
cmd = ['service o2cb load']
doCmd(cmd)
config='''
y
o2cb
%s
EOF
'''%cluster
cmd = ['service o2cb configure', '<<EOF', config]
doCmd(cmd)
cmd = ['service o2cb start %s'%cluster]
doCmd(cmd)

View File

@ -3,6 +3,7 @@ from OVSSiteSR import sp_create, sr_create, sr_do
from OVSParser import parse_ocfs2_cluster_conf
from OVSXCluster import clusterm_set_ocfs2_cluster_conf, clusterm_start_o2cb_service
from OVSSiteRMServer import get_master_ip
from OvmOCFS2Module import OvmOCFS2
import re
class OvmStoragePoolDecoder(json.JSONDecoder):
@ -193,39 +194,7 @@ class OvmStoragePool(OvmObject):
logger.error(OvmStoragePool.downloadTemplate, 'unmount secondary storage at %s failed, %s'%(secMountPoint, errmsg))
@staticmethod
def prepareOCFS2Nodes(clusterName, nodeString):
def compareClusterConfig(nodes):
def sortNodes(nodes):
ns = []
for n in nodes:
ns.insert(int(n["number"]), n)
return ns
def compareNodes(ns1, ns2):
if len(ns1) != len(ns2):
return False
for i in range(0, len(ns1)):
n1 = ns1[i]
n2 = ns2[i]
if n1["ip_address"] != n2["ip_address"] or n1["number"] != n2["number"] \
or n1["name"] != n2["name"]:
return False
return True
if exists(OCFS2_CLUSTER_CONF):
oldConf = parse_ocfs2_cluster_conf()
cluster = oldConf["cluster"]
nodesNum = cluster["node_count"]
if len(nodes) != nodesNum:
return False
new = sortNodes(nodes)
old = sortNodes(oldConf["nodes"])
return compareNodes(new, old)
else:
return False
def prepareOCFS2Nodes(clusterName, nodeString):
def configureEtcHosts(nodes):
if not exists(ETC_HOSTS):
orignalConf = ""
@ -275,6 +244,15 @@ class OvmStoragePool(OvmObject):
fd.write(originalConf)
fd.close()
doCmd(['hostname', nodeName])
def addNodes(nodes, clusterName):
ocfs2 = OvmOCFS2()
isOnline = ocfs2._isClusterOnline(clusterName)
if not isOnline:
ocfs2._prepareConf(clusterName)
for n in nodes:
ocfs2._addNode(n['name'], n['number'], n['ip_address'], 7777, clusterName, isOnline)
def checkStaleCluster(clusterName):
if exists('/sys/kernel/config/cluster/'):
@ -287,7 +265,7 @@ class OvmStoragePool(OvmObject):
3) /etc/init.d/o2cb offline %s
4) /etc/init.d/o2cb restart
if this doesn't resolve the problem, please check oracle manual to see how to offline a cluster
''' % (dir, get_master_ip, dir)
''' % (dir, successToMap(get_master_ip())['ip'], dir)
raise Exception(errMsg)
try:
@ -303,31 +281,14 @@ if this doesn't resolve the problem, please check oracle manual to see how to of
if len(nodes) > 255:
raise Exception("%s nodes beyond maximum 255 allowed by OCFS2"%len(nodes))
if compareClusterConfig(nodes):
logger.debug(OvmStoragePool.prepareOCFS2Nodes, "Nodes configure are the same, return")
rs = SUCC()
return rs
lines = []
for n in nodes:
lines.append("node:\n")
lines.append("\tip_port = %s\n" % "7777")
lines.append("\tip_address = %s\n" % n["ip_address"])
lines.append("\tnumber = %s\n" % n["number"])
lines.append("\tname = %s\n" % n["name"])
lines.append("\tcluster = %s\n" % clusterName)
lines.append("\n")
lines.append("cluster:\n")
lines.append("\tnode_count = %d\n" % len(nodes))
lines.append("\tname = %s\n" % clusterName)
lines.append("\n")
conf = "".join(lines)
configureHostName(nodes)
configureEtcHosts(nodes)
clusterm_set_ocfs2_cluster_conf(conf)
clusterm_start_o2cb_service()
logger.debug(OvmStoragePool.prepareOCFS2Nodes, "Configure cluster.conf to:\n%s"%conf)
addNodes(nodes, clusterName)
OvmOCFS2()._start(clusterName)
fd = open(OCFS2_CONF, 'r')
conf = fd.readlines()
fd.close()
logger.debug(OvmStoragePool.prepareOCFS2Nodes, "Configure cluster.conf to:\n%s"%' '.join(conf))
rs = SUCC()
return rs