CKS: Externalize control and worker node setup wait time and installation attempts

This commit is contained in:
Pearl Dsilva 2024-04-10 10:06:40 -04:00 committed by nvazquez
parent 67c75e13c7
commit 535219897d
No known key found for this signature in database
GPG Key ID: 656E1BCC8CB54F84
7 changed files with 145 additions and 7 deletions

View File

@ -2288,7 +2288,11 @@ public class KubernetesClusterManagerImpl extends ManagerBase implements Kuberne
KubernetesClusterUpgradeRetries,
KubernetesClusterAddNodeTimeout,
KubernetesClusterExperimentalFeaturesEnabled,
KubernetesMaxClusterSize
KubernetesMaxClusterSize,
KubernetesControlNodeInstallAttemptWait,
KubernetesControlNodeInstallReattempts,
KubernetesWorkerNodeInstallAttemptWait,
KubernetesWorkerNodeInstallReattempts
};
}
}

View File

@ -105,6 +105,30 @@ public interface KubernetesClusterService extends PluggableService, Configurable
true,
ConfigKey.Scope.Account,
KubernetesServiceEnabled.key());
static final ConfigKey<Long> KubernetesControlNodeInstallAttemptWait = new ConfigKey<Long>("Advanced", Long.class,
"cloud.kubernetes.control.node.install.attempt.wait.duration",
"15",
"Time in seconds for the installation process to wait before it re-attempts",
true,
KubernetesServiceEnabled.key());
static final ConfigKey<Long> KubernetesControlNodeInstallReattempts = new ConfigKey<Long>("Advanced", Long.class,
"cloud.kubernetes.control.node.install.reattempt.count",
"100",
"Number of times the offline installation of K8S will be re-attempted",
true,
KubernetesServiceEnabled.key());
final ConfigKey<Long> KubernetesWorkerNodeInstallAttemptWait = new ConfigKey<Long>("Advanced", Long.class,
"cloud.kubernetes.worker.node.install.attempt.wait.duration",
"30",
"Time in seconds for the installation process to wait before it re-attempts",
true,
KubernetesServiceEnabled.key());
static final ConfigKey<Long> KubernetesWorkerNodeInstallReattempts = new ConfigKey<Long>("Advanced", Long.class,
"cloud.kubernetes.worker.node.install.reattempt.count",
"40",
"Number of times the offline installation of K8S will be re-attempted",
true,
KubernetesServiceEnabled.key());
KubernetesCluster findById(final Long id);

View File

@ -36,14 +36,19 @@ import java.util.stream.Collectors;
import javax.inject.Inject;
import com.cloud.kubernetes.cluster.KubernetesClusterDetailsVO;
import com.cloud.kubernetes.cluster.KubernetesClusterHelper.KubernetesClusterNodeType;
import com.cloud.kubernetes.cluster.KubernetesClusterService;
import com.cloud.kubernetes.cluster.utils.KubernetesClusterUtil;
import com.cloud.network.rules.FirewallManager;
import com.cloud.network.rules.RulesService;
import com.cloud.network.rules.dao.PortForwardingRulesDao;
import com.cloud.offering.NetworkOffering;
import com.cloud.offerings.dao.NetworkOfferingDao;
import com.cloud.user.SSHKeyPairVO;
import com.cloud.utils.db.TransactionCallbackWithException;
import com.cloud.utils.net.Ip;
import org.apache.cloudstack.api.ApiConstants;
import org.apache.cloudstack.api.BaseCmd;
import org.apache.cloudstack.api.command.user.firewall.CreateFirewallRuleCmd;
import org.apache.cloudstack.api.command.user.network.CreateNetworkACLCmd;
@ -166,6 +171,79 @@ public class KubernetesClusterResourceModifierActionWorker extends KubernetesClu
kubernetesClusterNodeNamePrefix = getKubernetesClusterNodeNamePrefix();
}
private String getKubernetesNodeConfig(final String joinIp, final boolean ejectIso) throws IOException {
String k8sNodeConfig = readResourceFile("/conf/k8s-node.yml");
final String sshPubKey = "{{ k8s.ssh.pub.key }}";
final String joinIpKey = "{{ k8s_control_node.join_ip }}";
final String clusterTokenKey = "{{ k8s_control_node.cluster.token }}";
final String ejectIsoKey = "{{ k8s.eject.iso }}";
final String installWaitTime = "{{ k8s.install.wait.time }}";
final String installReattemptsCount = "{{ k8s.install.reattempts.count }}";
final Long waitTime = KubernetesClusterService.KubernetesWorkerNodeInstallAttemptWait.value();
final Long reattempts = KubernetesClusterService.KubernetesWorkerNodeInstallReattempts.value();
String pubKey = "- \"" + configurationDao.getValue("ssh.publickey") + "\"";
String sshKeyPair = kubernetesCluster.getKeyPair();
if (StringUtils.isNotEmpty(sshKeyPair)) {
SSHKeyPairVO sshkp = sshKeyPairDao.findByName(owner.getAccountId(), owner.getDomainId(), sshKeyPair);
if (sshkp != null) {
pubKey += "\n - \"" + sshkp.getPublicKey() + "\"";
}
}
k8sNodeConfig = k8sNodeConfig.replace(sshPubKey, pubKey);
k8sNodeConfig = k8sNodeConfig.replace(joinIpKey, joinIp);
k8sNodeConfig = k8sNodeConfig.replace(clusterTokenKey, KubernetesClusterUtil.generateClusterToken(kubernetesCluster));
k8sNodeConfig = k8sNodeConfig.replace(ejectIsoKey, String.valueOf(ejectIso));
k8sNodeConfig = k8sNodeConfig.replace(installWaitTime, String.valueOf(waitTime));
k8sNodeConfig = k8sNodeConfig.replace(installReattemptsCount, String.valueOf(reattempts));
k8sNodeConfig = updateKubeConfigWithRegistryDetails(k8sNodeConfig);
return k8sNodeConfig;
}
protected String updateKubeConfigWithRegistryDetails(String k8sConfig) {
/* genarate /etc/containerd/config.toml file on the nodes only if Kubernetes cluster is created to
* use docker private registry */
String registryUsername = null;
String registryPassword = null;
String registryUrl = null;
List<KubernetesClusterDetailsVO> details = kubernetesClusterDetailsDao.listDetails(kubernetesCluster.getId());
for (KubernetesClusterDetailsVO detail : details) {
if (detail.getName().equals(ApiConstants.DOCKER_REGISTRY_USER_NAME)) {
registryUsername = detail.getValue();
}
if (detail.getName().equals(ApiConstants.DOCKER_REGISTRY_PASSWORD)) {
registryPassword = detail.getValue();
}
if (detail.getName().equals(ApiConstants.DOCKER_REGISTRY_URL)) {
registryUrl = detail.getValue();
}
}
if (StringUtils.isNoneEmpty(registryUsername, registryPassword, registryUrl)) {
// Update runcmd in the cloud-init configuration to run a script that updates the containerd config with provided registry details
String runCmd = "- bash -x /opt/bin/setup-containerd";
String registryEp = registryUrl.split("://")[1];
k8sConfig = k8sConfig.replace("- containerd config default > /etc/containerd/config.toml", runCmd);
final String registryUrlKey = "{{registry.url}}";
final String registryUrlEpKey = "{{registry.url.endpoint}}";
final String registryAuthKey = "{{registry.token}}";
final String registryUname = "{{registry.username}}";
final String registryPsswd = "{{registry.password}}";
final String usernamePasswordKey = registryUsername + ":" + registryPassword;
String base64Auth = Base64.encodeBase64String(usernamePasswordKey.getBytes(com.cloud.utils.StringUtils.getPreferredCharset()));
k8sConfig = k8sConfig.replace(registryUrlKey, registryUrl);
k8sConfig = k8sConfig.replace(registryUrlEpKey, registryEp);
k8sConfig = k8sConfig.replace(registryUname, registryUsername);
k8sConfig = k8sConfig.replace(registryPsswd, registryPassword);
k8sConfig = k8sConfig.replace(registryAuthKey, base64Auth);
}
return k8sConfig;
}
protected DeployDestination plan(final long nodesCount, final DataCenter zone, final ServiceOffering offering) throws InsufficientServerCapacityException {
final int cpu_requested = offering.getCpu() * offering.getSpeed();
final long ram_requested = offering.getRamSize() * 1024L * 1024L;

View File

@ -141,6 +141,9 @@ public class KubernetesClusterStartWorker extends KubernetesClusterResourceModif
final String clusterToken = "{{ k8s_control_node.cluster.token }}";
final String clusterInitArgsKey = "{{ k8s_control_node.cluster.initargs }}";
final String ejectIsoKey = "{{ k8s.eject.iso }}";
final String installWaitTime = "{{ k8s.install.wait.time }}";
final String installReattemptsCount = "{{ k8s.install.reattempts.count }}";
final List<String> addresses = new ArrayList<>();
addresses.add(controlNodeIp);
if (!serverIp.equals(controlNodeIp)) {
@ -152,6 +155,8 @@ public class KubernetesClusterStartWorker extends KubernetesClusterResourceModif
final String tlsClientCert = CertUtils.x509CertificateToPem(certificate.getClientCertificate());
final String tlsPrivateKey = CertUtils.privateKeyToPem(certificate.getPrivateKey());
final String tlsCaCert = CertUtils.x509CertificatesToPem(certificate.getCaCertificates());
final Long waitTime = KubernetesClusterService.KubernetesControlNodeInstallAttemptWait.value();
final Long reattempts = KubernetesClusterService.KubernetesControlNodeInstallReattempts.value();
k8sControlNodeConfig = k8sControlNodeConfig.replace(apiServerCert, tlsClientCert.replace("\n", "\n "));
k8sControlNodeConfig = k8sControlNodeConfig.replace(apiServerKey, tlsPrivateKey.replace("\n", "\n "));
k8sControlNodeConfig = k8sControlNodeConfig.replace(caCert, tlsCaCert.replace("\n", "\n "));
@ -163,6 +168,8 @@ public class KubernetesClusterStartWorker extends KubernetesClusterResourceModif
pubKey += "\n - \"" + sshkp.getPublicKey() + "\"";
}
}
k8sControlNodeConfig = k8sControlNodeConfig.replace(installWaitTime, String.valueOf(waitTime));
k8sControlNodeConfig = k8sControlNodeConfig.replace(installReattemptsCount, String.valueOf(reattempts));
k8sControlNodeConfig = k8sControlNodeConfig.replace(sshPubKey, pubKey);
k8sControlNodeConfig = k8sControlNodeConfig.replace(clusterToken, KubernetesClusterUtil.generateClusterToken(kubernetesCluster));
String initArgs = "";
@ -244,6 +251,11 @@ public class KubernetesClusterStartWorker extends KubernetesClusterResourceModif
final String sshPubKey = "{{ k8s.ssh.pub.key }}";
final String clusterHACertificateKey = "{{ k8s_control_node.cluster.ha.certificate.key }}";
final String ejectIsoKey = "{{ k8s.eject.iso }}";
final String installWaitTime = "{{ k8s.install.wait.time }}";
final String installReattemptsCount = "{{ k8s.install.reattempts.count }}";
final Long waitTime = KubernetesClusterService.KubernetesControlNodeInstallAttemptWait.value();
final Long reattempts = KubernetesClusterService.KubernetesControlNodeInstallReattempts.value();
String pubKey = "- \"" + configurationDao.getValue("ssh.publickey") + "\"";
String sshKeyPair = kubernetesCluster.getKeyPair();
if (StringUtils.isNotEmpty(sshKeyPair)) {
@ -252,6 +264,8 @@ public class KubernetesClusterStartWorker extends KubernetesClusterResourceModif
pubKey += "\n - \"" + sshkp.getPublicKey() + "\"";
}
}
k8sControlNodeConfig = k8sControlNodeConfig.replace(installWaitTime, String.valueOf(waitTime));
k8sControlNodeConfig = k8sControlNodeConfig.replace(installReattemptsCount, String.valueOf(reattempts));
k8sControlNodeConfig = k8sControlNodeConfig.replace(sshPubKey, pubKey);
k8sControlNodeConfig = k8sControlNodeConfig.replace(joinIpKey, joinIp);
k8sControlNodeConfig = k8sControlNodeConfig.replace(clusterTokenKey, KubernetesClusterUtil.generateClusterToken(kubernetesCluster));

View File

@ -42,8 +42,14 @@ write_files:
ATTEMPT_ONLINE_INSTALL=false
setup_complete=false
OFFLINE_INSTALL_ATTEMPT_SLEEP=15
MAX_OFFLINE_INSTALL_ATTEMPTS=100
OFFLINE_INSTALL_ATTEMPT_SLEEP={{ k8s.install.wait.time }}
MAX_OFFLINE_INSTALL_ATTEMPTS={{ k8s.install.reattempts.count }}
if [[ -z $OFFLINE_INSTALL_ATTEMPT_SLEEP || $OFFLINE_INSTALL_ATTEMPT_SLEEP -eq 0 ]]; then
OFFLINE_INSTALL_ATTEMPT_SLEEP=15
fi
if [[ -z $MAX_OFFLINE_INSTALL_ATTEMPTS || $MAX_OFFLINE_INSTALL_ATTEMPTS -eq 0 ]]; then
MAX_OFFLINE_INSTALL_ATTEMPTS=100
fi
offline_attempts=1
MAX_SETUP_CRUCIAL_CMD_ATTEMPTS=3
EJECT_ISO_FROM_OS={{ k8s.eject.iso }}

View File

@ -62,8 +62,14 @@ write_files:
ATTEMPT_ONLINE_INSTALL=false
setup_complete=false
OFFLINE_INSTALL_ATTEMPT_SLEEP=15
MAX_OFFLINE_INSTALL_ATTEMPTS=100
OFFLINE_INSTALL_ATTEMPT_SLEEP={{ k8s.install.wait.time }}
MAX_OFFLINE_INSTALL_ATTEMPTS={{ k8s.install.reattempts.count }}
if [[ -z $OFFLINE_INSTALL_ATTEMPT_SLEEP || $OFFLINE_INSTALL_ATTEMPT_SLEEP -eq 0 ]]; then
OFFLINE_INSTALL_ATTEMPT_SLEEP=15
fi
if [[ -z $MAX_OFFLINE_INSTALL_ATTEMPTS || $MAX_OFFLINE_INSTALL_ATTEMPTS -eq 0 ]]; then
MAX_OFFLINE_INSTALL_ATTEMPTS=100
fi
offline_attempts=1
MAX_SETUP_CRUCIAL_CMD_ATTEMPTS=3
EJECT_ISO_FROM_OS={{ k8s.eject.iso }}

View File

@ -42,8 +42,14 @@ write_files:
ATTEMPT_ONLINE_INSTALL=false
setup_complete=false
OFFLINE_INSTALL_ATTEMPT_SLEEP=30
MAX_OFFLINE_INSTALL_ATTEMPTS=40
OFFLINE_INSTALL_ATTEMPT_SLEEP={{ k8s.install.wait.time }}
MAX_OFFLINE_INSTALL_ATTEMPTS={{ k8s.install.reattempts.count }}
if [[ -z $OFFLINE_INSTALL_ATTEMPT_SLEEP || $OFFLINE_INSTALL_ATTEMPT_SLEEP -eq 0 ]]; then
OFFLINE_INSTALL_ATTEMPT_SLEEP=30
fi
if [[ -z $MAX_OFFLINE_INSTALL_ATTEMPTS || $MAX_OFFLINE_INSTALL_ATTEMPTS -eq 0 ]]; then
MAX_OFFLINE_INSTALL_ATTEMPTS=40
fi
offline_attempts=1
MAX_SETUP_CRUCIAL_CMD_ATTEMPTS=3
EJECT_ISO_FROM_OS={{ k8s.eject.iso }}