Revert "CLOUDSTACK-5923: CS doesn't do master switch for XS any more, CS will depend on XS HA to do master switch, XS HA needs to be enabled."

This reverts commit af5f3d5676.
This commit is contained in:
Anthony Xu 2014-01-22 15:07:39 -08:00
parent aa6f4dc8a7
commit 43d485c92d
3 changed files with 465 additions and 47 deletions

View File

@ -188,7 +188,8 @@ public class XcpServerDiscoverer extends DiscovererBase implements Discoverer, L
String hostIp = ia.getHostAddress();
Queue<String> pass=new LinkedList<String>();
pass.add(password);
conn = _connPool.getConnect(hostIp, username, pass);
String masterIp = _connPool.getMasterIp(hostIp, username, pass);
conn = _connPool.masterConnect(masterIp, username, pass);
if (conn == null) {
String msg = "Unable to get a connection to " + url;
s_logger.debug(msg);
@ -391,7 +392,7 @@ public class XcpServerDiscoverer extends DiscovererBase implements Discoverer, L
password = host.getDetail("password");
pass.add(password);
String address = host.getPrivateIpAddress();
Connection hostConn = _connPool.getConnect(address, username, pass);
Connection hostConn = _connPool.slaveConnect(address, username, pass);
if (hostConn == null) {
continue;
}
@ -405,7 +406,7 @@ public class XcpServerDiscoverer extends DiscovererBase implements Discoverer, L
s_logger.warn("Can not get master ip address from host " + address);
} finally {
try{
Session.logout(hostConn);
Session.localLogout(hostConn);
} catch (Exception e ) {
}
hostConn.dispose();

View File

@ -430,12 +430,23 @@ public abstract class CitrixResourceBase implements ServerResource, HypervisorRe
}
protected boolean pingXenServer() {
Connection conn = getConnection();
Session slaveSession = null;
Connection slaveConn = null;
try {
callHostPlugin(conn, "echo", "main");
URL slaveUrl = null;
slaveUrl = _connPool.getURL(_host.ip);
slaveConn = new Connection(slaveUrl, 10);
slaveSession = _connPool.slaveLocalLoginWithPassword(slaveConn, _username, _password);
return true;
} catch (Exception e) {
s_logger.debug("cannot ping host " + _host.ip + " due to " + e.toString(), e);
} finally {
if( slaveSession != null ){
try{
Session.localLogout(slaveConn);
} catch (Exception e) {
}
slaveConn.dispose();
}
}
return false;
}
@ -6053,9 +6064,9 @@ public abstract class CitrixResourceBase implements ServerResource, HypervisorRe
}
private void CheckXenHostInfo() throws ConfigurationException {
Connection conn = _connPool.getConnect(_host.ip, _username, _password);
Connection conn = _connPool.slaveConnect(_host.ip, _username, _password);
if( conn == null ) {
throw new ConfigurationException("Can not create connection to " + _host.ip);
throw new ConfigurationException("Can not create slave connection to " + _host.ip);
}
try {
Host.Record hostRec = null;
@ -6075,7 +6086,7 @@ public abstract class CitrixResourceBase implements ServerResource, HypervisorRe
}
} finally {
try {
Session.logout(conn);
Session.localLogout(conn);
} catch (Exception e) {
}
}
@ -7371,11 +7382,35 @@ public abstract class CitrixResourceBase implements ServerResource, HypervisorRe
Host.Record hostr = poolr.master.getRecord(conn);
if (_host.uuid.equals(hostr.uuid)) {
boolean mastermigrated = false;
Map<Host, Host.Record> hostMap = Host.getAllRecords(conn);
if (hostMap.size() > 1) {
String msg = "This host is XS master, please designate a new XS master throught XenCenter before you delete this host from CS";
if (hostMap.size() != 1) {
Host newMaster = null;
Host.Record newMasterRecord = null;
for (Map.Entry<Host, Host.Record> entry : hostMap.entrySet()) {
if (_host.uuid.equals(entry.getValue().uuid)) {
continue;
}
newMaster = entry.getKey();
newMasterRecord = entry.getValue();
s_logger.debug("New master for the XenPool is " + newMasterRecord.uuid + " : " + newMasterRecord.address);
try {
_connPool.switchMaster(_host.ip, _host.pool, conn, newMaster, _username, _password, _wait);
mastermigrated = true;
break;
} catch (Exception e) {
s_logger.warn("Unable to switch the new master to " + newMasterRecord.uuid + ": " + newMasterRecord.address + " due to " + e.toString());
}
}
} else {
s_logger.debug("This is last host to eject, so don't need to eject: " + hostuuid);
return new Answer(cmd);
}
if ( !mastermigrated ) {
String msg = "this host is master, and cannot designate a new master";
s_logger.debug(msg);
return new Answer(cmd, false, msg);
}
}

View File

@ -54,6 +54,7 @@ public class XenServerConnectionPool {
protected HashMap<String /* poolUuid */, XenServerConnection> _conns = new HashMap<String, XenServerConnection>();
protected int _retries;
protected int _interval;
protected static boolean s_managePool = true;
protected static long s_sleepOnError = 10 * 1000; // in ms
static {
File file = PropertiesUtil.findConfigFile("environment.properties");
@ -66,11 +67,15 @@ public class XenServerConnectionPool {
final Properties props = new Properties();
props.load(finputstream);
finputstream.close();
String search = props.getProperty("sleep.interval.on.error");
String search = props.getProperty("manage.xenserver.pool.master");
if (search != null) {
s_managePool = Boolean.parseBoolean(search);
}
search = props.getProperty("sleep.interval.on.error");
if (search != null) {
s_sleepOnError = NumbersUtil.parseInterval(search, 10) * 1000;
}
s_logger.info("XenServer Connection Pool Configs: sleep.interval.on.error=" + s_sleepOnError);
s_logger.info("XenServer Connection Pool Configs: manage.xenserver.pool.master=" + s_managePool + "; sleep.interval.on.error=" + s_sleepOnError);
} catch (FileNotFoundException e) {
s_logger.debug("File is not found", e);
} catch (IOException e) {
@ -207,29 +212,301 @@ public class XenServerConnectionPool {
return false;
}
public Connection getConnect(String ip, String username, Queue<String> password) {
Connection conn = new Connection(getURL(ip), 10);
public void switchMaster(String slaveIp, String poolUuid,
Connection conn, Host host, String username, Queue<String> password,
int wait) throws XmlRpcException, XenAPIException {
synchronized (poolUuid.intern()) {
String masterIp = host.getAddress(conn);
s_logger.debug("Designating the new master to " + masterIp);
Pool.designateNewMaster(conn, host);
Connection slaveConn = null;
Connection masterConn = null;
int retry = 30;
for (int i = 0; i < retry; i++) {
forceSleep(5);
try {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Logging on as the slave to " + slaveIp);
}
slaveConn = null;
masterConn = null;
Session slaveSession = null;
slaveConn = new Connection(getURL(slaveIp), 10);
slaveSession = slaveLocalLoginWithPassword(slaveConn, username, password);
if (s_logger.isDebugEnabled()) {
s_logger.debug("Slave logon successful. session= "
+ slaveSession);
}
Pool.Record pr = getPoolRecord(slaveConn);
Host master = pr.master;
String ma = master.getAddress(slaveConn);
if (!ma.trim().equals(masterIp.trim())) {
continue;
}
s_logger.debug("Logging on as the master to " + masterIp);
masterConn = new Connection(getURL(masterIp), 10);
loginWithPassword(masterConn, username, password, APIVersion.latest().toString());
removeConnect(poolUuid);
ensurePoolIntegrity(masterConn, masterIp, username, password, wait);
return;
} catch (Types.HostIsSlave e) {
s_logger.debug("HostIsSlaveException: Still waiting for the conversion to the master");
} catch (XmlRpcException e) {
s_logger.debug("XmlRpcException: Still waiting for the conversion to the master " + e.getMessage());
} catch (Exception e) {
s_logger.debug("Exception: Still waiting for the conversion to the master" + e.getMessage());
} finally {
if (masterConn != null) {
try {
Session.logout(masterConn);
} catch (Exception e) {
s_logger.debug("Unable to log out of session: "
+ e.getMessage());
}
masterConn.dispose();
masterConn = null;
}
localLogout(slaveConn);
slaveConn = null;
}
}
throw new CloudRuntimeException(
"Unable to logon to the new master after " + retry + " retries");
}
}
private void localLogout(Connection conn) {
if ( conn == null )
return;
try {
if( s_logger.isTraceEnabled()) {
s_logger.trace("Logging out of the session "
+ conn.getSessionReference());
}
Session.localLogout(conn);
} catch (Exception e) {
s_logger.debug("localLogout has problem " + e.getMessage());
} finally {
conn.dispose();
conn = null;
}
}
public Connection slaveConnect(String ip, String username, Queue<String> password) {
Connection conn = null;
try{
conn = new Connection(getURL(ip), 10);
slaveLocalLoginWithPassword(conn, username, password);
return conn;
}catch ( Exception e){
s_logger.debug("Failed to slave local login to " + ip);
}
return null;
}
public Connection masterConnect(String ip, String username, Queue<String> password) {
Connection conn = null;
try{
conn = new Connection(getURL(ip), 10);
s_logger.debug("Logging on as the master to " + ip);
loginWithPassword(conn, username, password, APIVersion.latest().toString());
} catch (Types.HostIsSlave e) {
String maddress = e.masterIPAddress;
conn = new Connection(getURL(maddress), 10);
try {
loginWithPassword(conn, username, password, APIVersion.latest().toString());
} catch (Exception e1) {
String msg = "Unable to create master connection to host(" + maddress +") , due to " + e1.toString();
s_logger.debug(msg);
throw new CloudRuntimeException(msg, e1);
return conn;
}catch ( Exception e){
s_logger.debug("Failed to slave local login to " + ip);
}
throw new RuntimeException("can not log in to master " + ip);
}
public String getMasterIp(String ip, String username, Queue<String> password) throws XenAPIException {
Connection slaveConn = null;
try{
slaveConn = new Connection(getURL(ip), 10);
slaveLocalLoginWithPassword(slaveConn, username, password);
if (s_logger.isDebugEnabled()) {
s_logger.debug("Slave logon to " + ip);
}
String masterIp = null;
Pool.Record pr = getPoolRecord(slaveConn);
Host master = pr.master;
masterIp = master.getAddress(slaveConn);
return masterIp;
}catch(Types.SessionAuthenticationFailed e){
s_logger.debug("Failed to slave local login to " + ip + " due to " + e.toString());
throw e;
}catch ( Exception e){
s_logger.debug("Failed to slave local login to " + ip + " due to " + e.toString());
} finally {
localLogout(slaveConn);
slaveConn = null;
}
throw new RuntimeException("can not get master ip");
}
void PoolEmergencyTransitionToMaster(String slaveIp, String username, Queue<String> password) {
if (!s_managePool) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Don't manage pool on error so sleeping for " + s_sleepOnError);
try {
Thread.sleep(s_sleepOnError);
} catch (InterruptedException ie) {
}
}
return;
}
Connection slaveConn = null;
Connection c = null;
try{
s_logger.debug("Trying to transition master to " + slaveIp);
slaveConn = new Connection(getURL(slaveIp), 10);
slaveLocalLoginWithPassword(slaveConn, username, password);
Pool.emergencyTransitionToMaster(slaveConn);
// restart xapi in 10 sec
forceSleep(10);
// check if the master of this host is set correctly.
c = new Connection(getURL(slaveIp), 10);
for (int i = 0; i < 30; i++) {
try {
loginWithPassword(c, username, password, APIVersion.latest().toString());
s_logger.debug("Succeeded to transition master to " + slaveIp);
return;
} catch (Types.HostIsSlave e) {
s_logger.debug("HostIsSlave: Still waiting for the conversion to the master " + slaveIp);
} catch (Exception e) {
s_logger.debug("Exception: Still waiting for the conversion to the master");
}
forceSleep(2);
}
throw new RuntimeException("EmergencyTransitionToMaster failed after retry 30 times");
} catch (Exception e) {
throw new RuntimeException("EmergencyTransitionToMaster failed due to " + e.getMessage());
} finally {
localLogout(slaveConn);
slaveConn = null;
if(c != null) {
try {
Session.logout(c);
c.dispose();
} catch (Exception e) {
}
}
}
}
private void PoolEmergencyResetMaster(String slaveIp, String masterIp, String username, Queue<String> password) {
if (!s_managePool) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Don't manage pool on error so sleeping for " + s_sleepOnError);
try {
Thread.sleep(s_sleepOnError);
} catch (InterruptedException ie) {
}
}
return;
}
Connection slaveConn = null;
try {
s_logger.debug("Trying to reset master of slave " + slaveIp
+ " to " + masterIp);
slaveConn = new Connection(getURL(slaveIp), 10);
slaveLocalLoginWithPassword(slaveConn, username, password);
Pool.emergencyResetMaster(slaveConn, masterIp);
forceSleep(10);
for (int i = 0; i < 30; i++) {
try {
slaveLocalLoginWithPassword(slaveConn, username, password);
Pool.Record pr = getPoolRecord(slaveConn);
String mIp = pr.master.getAddress(slaveConn);
if (mIp.trim().equals(masterIp.trim())) {
s_logger.debug("Succeeded to reset master of slave " + slaveIp + " to " + masterIp);
return;
}
} catch (Exception e) {
} finally {
localLogout(slaveConn);
slaveConn = null;
}
// wait 2 second
forceSleep(2);
}
throw new CloudRuntimeException("Unable to reset master of slave " + slaveIp
+ " to " + masterIp + "after 30 retry");
} catch (Exception e) {
throw new CloudRuntimeException("Unable to reset master of slave " + slaveIp
+ " to " + masterIp + " due to " + e.toString());
} finally {
localLogout(slaveConn);
slaveConn = null;
}
}
protected void ensurePoolIntegrity(Connection conn,
String masterIp, String username, Queue<String> password, int wait) {
try {
// try recoverSlave first
Set<Host> rcSlaves = Pool.recoverSlaves(conn);
// wait 10 second
forceSleep(10);
for(Host slave : rcSlaves ) {
for (int i = 0; i < 30; i++) {
Connection slaveConn = null;
try {
String slaveIp = slave.getAddress(conn);
s_logger.debug("Logging on as the slave to " + slaveIp);
slaveConn = new Connection(getURL(slaveIp), 10);
slaveLocalLoginWithPassword(slaveConn, username, password);
Pool.Record pr = getPoolRecord(slaveConn);
String mIp = pr.master.getAddress(slaveConn);
if (mIp.trim().equals(masterIp.trim())) {
break;
}
} catch (Exception e) {
} finally {
localLogout(slaveConn);
slaveConn = null;
}
// wait 2 second
forceSleep(2);
}
}
// then try emergency reset master
Set<Host> slaves = Host.getAll(conn);
for (Host slave : slaves) {
String slaveIp = slave.getAddress(conn);
Connection slaveConn = null;
try {
s_logger.debug("Logging on as the slave to " + slaveIp);
slaveConn = new Connection(getURL(slaveIp), 10);
slaveLocalLoginWithPassword(slaveConn, username, password);
Pool.Record slavePoolr = getPoolRecord(slaveConn);
String ip = slavePoolr.master.getAddress(slaveConn);
if (!masterIp.trim().equals(ip.trim())) {
PoolEmergencyResetMaster(slaveIp, masterIp, username, password);
}
} catch (Exception e) {
s_logger.debug("Unable to login to slave " + slaveIp + " error " + e.getMessage());
} finally {
localLogout(slaveConn);
slaveConn = null;
}
}
} catch (Exception e) {
String msg = "Unable to create master connection to host(" + ip +") , due to " + e.toString();
s_logger.debug(msg);
throw new CloudRuntimeException(msg, e);
if (s_logger.isDebugEnabled()) {
s_logger.debug("Catch " + e.getClass().getName() + " due to " + e.toString());
}
}
return conn;
}
public URL getURL(String ip){
try {
return new URL("https://" + ip);
@ -245,18 +522,39 @@ public class XenServerConnectionPool {
public Connection connect(String hostUuid, String poolUuid, String ipAddress,
String username, Queue<String> password, int wait) {
XenServerConnection mConn = null;
Connection sConn = null;
String masterIp = null;
if (hostUuid == null || poolUuid == null || ipAddress == null || username == null || password == null) {
String msg = "Connect some parameter are null hostUuid:" + hostUuid + " ,poolUuid:" + poolUuid
+ " ,ipAddress:" + ipAddress;
s_logger.debug(msg);
throw new CloudRuntimeException(msg);
}
Host host = null;
synchronized (poolUuid.intern()) {
// Let's see if it is an existing connection.
mConn = getConnect(poolUuid);
if (mConn != null){
try{
Host.getByUuid(mConn, hostUuid);
host = Host.getByUuid(mConn, hostUuid);
} catch (Types.SessionInvalid e) {
s_logger.debug("Session thgrough ip " + mConn.getIp() + " is invalid for pool(" + poolUuid + ") due to " + e.toString());
try {
loginWithPassword(mConn, mConn.getUsername(), mConn.getPassword(), APIVersion.latest().toString());
} catch (Exception e1) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("connect through IP(" + mConn.getIp() + " for pool(" + poolUuid + ") is broken due to " + e.toString());
}
removeConnect(poolUuid);
mConn = null;
}
} catch (UuidInvalid e) {
String msg = "Host(" + hostUuid + ") doesn't belong to pool(" + poolUuid + "), please execute 'xe pool-join master-address=" + mConn.getIp()
+ " master-username=" + mConn.getUsername();
if (s_logger.isDebugEnabled()) {
s_logger.debug(msg);
}
throw new CloudRuntimeException(msg, e);
} catch (Exception e) {
if (s_logger.isDebugEnabled()) {
s_logger.debug("connect through IP(" + mConn.getIp() + " for pool(" + poolUuid + ") is broken due to " + e.toString());
@ -267,29 +565,113 @@ public class XenServerConnectionPool {
}
if ( mConn == null ) {
mConn = new XenServerConnection(getURL(ipAddress), ipAddress, username, password, _retries, _interval, wait);
try {
loginWithPassword(mConn, username, password, APIVersion.latest().toString());
} catch (Types.HostIsSlave e) {
String maddress = e.masterIPAddress;
mConn = new XenServerConnection(getURL(maddress), maddress, username, password, _retries, _interval, wait);
try {
if (s_logger.isDebugEnabled()) {
s_logger.debug("Logging on as the slave to " + ipAddress);
}
sConn = new Connection(getURL(ipAddress), 5);
slaveLocalLoginWithPassword(sConn, username, password);
} catch (Exception e){
String msg = "Unable to create slave connection to host(" + hostUuid +") due to " + e.toString();
if (s_logger.isDebugEnabled()) {
s_logger.debug(msg);
}
throw new CloudRuntimeException(msg, e);
}
Pool.Record pr = null;
try {
pr = getPoolRecord(sConn);
} catch (Exception e) {
PoolEmergencyTransitionToMaster(ipAddress, username, password);
mConn = new XenServerConnection(getURL(ipAddress), ipAddress, username, password, _retries, _interval, wait);
try {
loginWithPassword(mConn, username, password, APIVersion.latest().toString());
pr = getPoolRecord(mConn);
} catch (Exception e1) {
String msg = "Unable to create master connection to host(" + hostUuid +") after transition it to master, due to " + e1.toString();
if (s_logger.isDebugEnabled()) {
s_logger.debug(msg);
}
throw new CloudRuntimeException(msg, e1);
}
if ( !pr.uuid.equals(poolUuid) ) {
String msg = "host(" + hostUuid +") should be in pool(" + poolUuid + "), but it is actually in pool(" + pr.uuid + ")";
if (s_logger.isDebugEnabled()) {
s_logger.debug(msg);
}
throw new CloudRuntimeException(msg);
} else {
if (s_managePool) {
ensurePoolIntegrity(mConn, ipAddress, username, password, wait);
}
addConnect(poolUuid, mConn);
return mConn;
}
}
if ( !pr.uuid.equals(poolUuid) ) {
String msg = "host(" + hostUuid +") should be in pool(" + poolUuid + "), but it is actually in pool(" + pr.uuid + ")";
if (s_logger.isDebugEnabled()) {
s_logger.debug(msg);
}
throw new CloudRuntimeException(msg);
}
try {
masterIp = pr.master.getAddress(sConn);
mConn = new XenServerConnection(getURL(masterIp), masterIp, username, password, _retries, _interval, wait);
loginWithPassword(mConn, username, password, APIVersion.latest().toString());
} catch (Exception e1) {
String msg = "Unable to create master connection to host(" + maddress +") , due to " + e1.toString();
s_logger.debug(msg);
throw new CloudRuntimeException(msg, e1);
}
} catch (Exception e) {
String msg = "Unable to create master connection to host(" + ipAddress +") , due to " + e.toString();
s_logger.debug(msg);
throw new CloudRuntimeException(msg, e);
addConnect(poolUuid, mConn);
return mConn;
} catch (Exception e) {
String msg = "Unable to logon in " + masterIp + " as master in pool(" + poolUuid + ")";
if (s_logger.isDebugEnabled()) {
s_logger.debug(msg);
}
throw new CloudRuntimeException(msg);
}
} finally {
localLogout(sConn);
sConn = null;
}
addConnect(poolUuid, mConn);
}
}
if ( mConn != null ) {
if (s_managePool) {
try {
Map<String, String> args = new HashMap<String, String>();
host.callPlugin(mConn, "echo", "main", args);
} catch (Types.SessionInvalid e) {
if (s_logger.isDebugEnabled()) {
String msg = "Catch Exception: " + e.getClass().getName() + " Can't connect host " + ipAddress + " due to " + e.toString();
s_logger.debug(msg);
}
PoolEmergencyResetMaster(ipAddress, mConn.getIp(), mConn.getUsername(), mConn.getPassword());
} catch (Types.CannotContactHost e ) {
if (s_logger.isDebugEnabled()) {
String msg = "Catch Exception: " + e.getClass().getName() + " Can't connect host " + ipAddress + " due to " + e.toString();
s_logger.debug(msg);
}
PoolEmergencyResetMaster(ipAddress, mConn.getIp(), mConn.getUsername(), mConn.getPassword());
} catch (Types.HostOffline e ) {
if (s_logger.isDebugEnabled()) {
String msg = "Catch Exception: " + e.getClass().getName() + " Host is offline " + ipAddress + " due to " + e.toString();
s_logger.debug(msg);
}
PoolEmergencyResetMaster(ipAddress, mConn.getIp(), mConn.getUsername(), mConn.getPassword());
} catch (Types.HostNotLive e ) {
String msg = "Catch Exception: " + e.getClass().getName() + " Host Not Live " + ipAddress + " due to " + e.toString();
if (s_logger.isDebugEnabled()) {
s_logger.debug(msg);
}
PoolEmergencyResetMaster(ipAddress, mConn.getIp(), mConn.getUsername(), mConn.getPassword());
} catch (Exception e) {
String msg = "Echo test failed on host " + hostUuid + " IP " + ipAddress;
s_logger.warn(msg, e);
throw new CloudRuntimeException(msg, e);
}
}
}
return mConn;
}