From 535219897df1e5912fa2fb56aee0c63e36958552 Mon Sep 17 00:00:00 2001 From: Pearl Dsilva Date: Wed, 10 Apr 2024 10:06:40 -0400 Subject: [PATCH] CKS: Externalize control and worker node setup wait time and installation attempts --- .../cluster/KubernetesClusterManagerImpl.java | 6 +- .../cluster/KubernetesClusterService.java | 24 ++++++ ...esClusterResourceModifierActionWorker.java | 78 +++++++++++++++++++ .../KubernetesClusterStartWorker.java | 14 ++++ .../resources/conf/k8s-control-node-add.yml | 10 ++- .../main/resources/conf/k8s-control-node.yml | 10 ++- .../src/main/resources/conf/k8s-node.yml | 10 ++- 7 files changed, 145 insertions(+), 7 deletions(-) diff --git a/plugins/integrations/kubernetes-service/src/main/java/com/cloud/kubernetes/cluster/KubernetesClusterManagerImpl.java b/plugins/integrations/kubernetes-service/src/main/java/com/cloud/kubernetes/cluster/KubernetesClusterManagerImpl.java index 6d4b013298a..53a6a3670e5 100644 --- a/plugins/integrations/kubernetes-service/src/main/java/com/cloud/kubernetes/cluster/KubernetesClusterManagerImpl.java +++ b/plugins/integrations/kubernetes-service/src/main/java/com/cloud/kubernetes/cluster/KubernetesClusterManagerImpl.java @@ -2288,7 +2288,11 @@ public class KubernetesClusterManagerImpl extends ManagerBase implements Kuberne KubernetesClusterUpgradeRetries, KubernetesClusterAddNodeTimeout, KubernetesClusterExperimentalFeaturesEnabled, - KubernetesMaxClusterSize + KubernetesMaxClusterSize, + KubernetesControlNodeInstallAttemptWait, + KubernetesControlNodeInstallReattempts, + KubernetesWorkerNodeInstallAttemptWait, + KubernetesWorkerNodeInstallReattempts }; } } diff --git a/plugins/integrations/kubernetes-service/src/main/java/com/cloud/kubernetes/cluster/KubernetesClusterService.java b/plugins/integrations/kubernetes-service/src/main/java/com/cloud/kubernetes/cluster/KubernetesClusterService.java index 9512a0563cb..fc47cc0943a 100644 --- a/plugins/integrations/kubernetes-service/src/main/java/com/cloud/kubernetes/cluster/KubernetesClusterService.java +++ b/plugins/integrations/kubernetes-service/src/main/java/com/cloud/kubernetes/cluster/KubernetesClusterService.java @@ -105,6 +105,30 @@ public interface KubernetesClusterService extends PluggableService, Configurable true, ConfigKey.Scope.Account, KubernetesServiceEnabled.key()); + static final ConfigKey KubernetesControlNodeInstallAttemptWait = new ConfigKey("Advanced", Long.class, + "cloud.kubernetes.control.node.install.attempt.wait.duration", + "15", + "Time in seconds for the installation process to wait before it re-attempts", + true, + KubernetesServiceEnabled.key()); + static final ConfigKey KubernetesControlNodeInstallReattempts = new ConfigKey("Advanced", Long.class, + "cloud.kubernetes.control.node.install.reattempt.count", + "100", + "Number of times the offline installation of K8S will be re-attempted", + true, + KubernetesServiceEnabled.key()); + final ConfigKey KubernetesWorkerNodeInstallAttemptWait = new ConfigKey("Advanced", Long.class, + "cloud.kubernetes.worker.node.install.attempt.wait.duration", + "30", + "Time in seconds for the installation process to wait before it re-attempts", + true, + KubernetesServiceEnabled.key()); + static final ConfigKey KubernetesWorkerNodeInstallReattempts = new ConfigKey("Advanced", Long.class, + "cloud.kubernetes.worker.node.install.reattempt.count", + "40", + "Number of times the offline installation of K8S will be re-attempted", + true, + KubernetesServiceEnabled.key()); KubernetesCluster findById(final Long id); diff --git a/plugins/integrations/kubernetes-service/src/main/java/com/cloud/kubernetes/cluster/actionworkers/KubernetesClusterResourceModifierActionWorker.java b/plugins/integrations/kubernetes-service/src/main/java/com/cloud/kubernetes/cluster/actionworkers/KubernetesClusterResourceModifierActionWorker.java index fd40efad9e3..df5a58f7bdf 100644 --- a/plugins/integrations/kubernetes-service/src/main/java/com/cloud/kubernetes/cluster/actionworkers/KubernetesClusterResourceModifierActionWorker.java +++ b/plugins/integrations/kubernetes-service/src/main/java/com/cloud/kubernetes/cluster/actionworkers/KubernetesClusterResourceModifierActionWorker.java @@ -36,14 +36,19 @@ import java.util.stream.Collectors; import javax.inject.Inject; +import com.cloud.kubernetes.cluster.KubernetesClusterDetailsVO; import com.cloud.kubernetes.cluster.KubernetesClusterHelper.KubernetesClusterNodeType; +import com.cloud.kubernetes.cluster.KubernetesClusterService; +import com.cloud.kubernetes.cluster.utils.KubernetesClusterUtil; import com.cloud.network.rules.FirewallManager; import com.cloud.network.rules.RulesService; import com.cloud.network.rules.dao.PortForwardingRulesDao; import com.cloud.offering.NetworkOffering; import com.cloud.offerings.dao.NetworkOfferingDao; +import com.cloud.user.SSHKeyPairVO; import com.cloud.utils.db.TransactionCallbackWithException; import com.cloud.utils.net.Ip; +import org.apache.cloudstack.api.ApiConstants; import org.apache.cloudstack.api.BaseCmd; import org.apache.cloudstack.api.command.user.firewall.CreateFirewallRuleCmd; import org.apache.cloudstack.api.command.user.network.CreateNetworkACLCmd; @@ -166,6 +171,79 @@ public class KubernetesClusterResourceModifierActionWorker extends KubernetesClu kubernetesClusterNodeNamePrefix = getKubernetesClusterNodeNamePrefix(); } + private String getKubernetesNodeConfig(final String joinIp, final boolean ejectIso) throws IOException { + String k8sNodeConfig = readResourceFile("/conf/k8s-node.yml"); + final String sshPubKey = "{{ k8s.ssh.pub.key }}"; + final String joinIpKey = "{{ k8s_control_node.join_ip }}"; + final String clusterTokenKey = "{{ k8s_control_node.cluster.token }}"; + final String ejectIsoKey = "{{ k8s.eject.iso }}"; + final String installWaitTime = "{{ k8s.install.wait.time }}"; + final String installReattemptsCount = "{{ k8s.install.reattempts.count }}"; + + final Long waitTime = KubernetesClusterService.KubernetesWorkerNodeInstallAttemptWait.value(); + final Long reattempts = KubernetesClusterService.KubernetesWorkerNodeInstallReattempts.value(); + String pubKey = "- \"" + configurationDao.getValue("ssh.publickey") + "\""; + String sshKeyPair = kubernetesCluster.getKeyPair(); + if (StringUtils.isNotEmpty(sshKeyPair)) { + SSHKeyPairVO sshkp = sshKeyPairDao.findByName(owner.getAccountId(), owner.getDomainId(), sshKeyPair); + if (sshkp != null) { + pubKey += "\n - \"" + sshkp.getPublicKey() + "\""; + } + } + k8sNodeConfig = k8sNodeConfig.replace(sshPubKey, pubKey); + k8sNodeConfig = k8sNodeConfig.replace(joinIpKey, joinIp); + k8sNodeConfig = k8sNodeConfig.replace(clusterTokenKey, KubernetesClusterUtil.generateClusterToken(kubernetesCluster)); + k8sNodeConfig = k8sNodeConfig.replace(ejectIsoKey, String.valueOf(ejectIso)); + k8sNodeConfig = k8sNodeConfig.replace(installWaitTime, String.valueOf(waitTime)); + k8sNodeConfig = k8sNodeConfig.replace(installReattemptsCount, String.valueOf(reattempts)); + k8sNodeConfig = updateKubeConfigWithRegistryDetails(k8sNodeConfig); + + return k8sNodeConfig; + } + + protected String updateKubeConfigWithRegistryDetails(String k8sConfig) { + /* genarate /etc/containerd/config.toml file on the nodes only if Kubernetes cluster is created to + * use docker private registry */ + String registryUsername = null; + String registryPassword = null; + String registryUrl = null; + + List details = kubernetesClusterDetailsDao.listDetails(kubernetesCluster.getId()); + for (KubernetesClusterDetailsVO detail : details) { + if (detail.getName().equals(ApiConstants.DOCKER_REGISTRY_USER_NAME)) { + registryUsername = detail.getValue(); + } + if (detail.getName().equals(ApiConstants.DOCKER_REGISTRY_PASSWORD)) { + registryPassword = detail.getValue(); + } + if (detail.getName().equals(ApiConstants.DOCKER_REGISTRY_URL)) { + registryUrl = detail.getValue(); + } + } + + if (StringUtils.isNoneEmpty(registryUsername, registryPassword, registryUrl)) { + // Update runcmd in the cloud-init configuration to run a script that updates the containerd config with provided registry details + String runCmd = "- bash -x /opt/bin/setup-containerd"; + + String registryEp = registryUrl.split("://")[1]; + k8sConfig = k8sConfig.replace("- containerd config default > /etc/containerd/config.toml", runCmd); + final String registryUrlKey = "{{registry.url}}"; + final String registryUrlEpKey = "{{registry.url.endpoint}}"; + final String registryAuthKey = "{{registry.token}}"; + final String registryUname = "{{registry.username}}"; + final String registryPsswd = "{{registry.password}}"; + + final String usernamePasswordKey = registryUsername + ":" + registryPassword; + String base64Auth = Base64.encodeBase64String(usernamePasswordKey.getBytes(com.cloud.utils.StringUtils.getPreferredCharset())); + k8sConfig = k8sConfig.replace(registryUrlKey, registryUrl); + k8sConfig = k8sConfig.replace(registryUrlEpKey, registryEp); + k8sConfig = k8sConfig.replace(registryUname, registryUsername); + k8sConfig = k8sConfig.replace(registryPsswd, registryPassword); + k8sConfig = k8sConfig.replace(registryAuthKey, base64Auth); + } + return k8sConfig; + } + protected DeployDestination plan(final long nodesCount, final DataCenter zone, final ServiceOffering offering) throws InsufficientServerCapacityException { final int cpu_requested = offering.getCpu() * offering.getSpeed(); final long ram_requested = offering.getRamSize() * 1024L * 1024L; diff --git a/plugins/integrations/kubernetes-service/src/main/java/com/cloud/kubernetes/cluster/actionworkers/KubernetesClusterStartWorker.java b/plugins/integrations/kubernetes-service/src/main/java/com/cloud/kubernetes/cluster/actionworkers/KubernetesClusterStartWorker.java index 97bd51fb780..8a4685251d2 100644 --- a/plugins/integrations/kubernetes-service/src/main/java/com/cloud/kubernetes/cluster/actionworkers/KubernetesClusterStartWorker.java +++ b/plugins/integrations/kubernetes-service/src/main/java/com/cloud/kubernetes/cluster/actionworkers/KubernetesClusterStartWorker.java @@ -141,6 +141,9 @@ public class KubernetesClusterStartWorker extends KubernetesClusterResourceModif final String clusterToken = "{{ k8s_control_node.cluster.token }}"; final String clusterInitArgsKey = "{{ k8s_control_node.cluster.initargs }}"; final String ejectIsoKey = "{{ k8s.eject.iso }}"; + final String installWaitTime = "{{ k8s.install.wait.time }}"; + final String installReattemptsCount = "{{ k8s.install.reattempts.count }}"; + final List addresses = new ArrayList<>(); addresses.add(controlNodeIp); if (!serverIp.equals(controlNodeIp)) { @@ -152,6 +155,8 @@ public class KubernetesClusterStartWorker extends KubernetesClusterResourceModif final String tlsClientCert = CertUtils.x509CertificateToPem(certificate.getClientCertificate()); final String tlsPrivateKey = CertUtils.privateKeyToPem(certificate.getPrivateKey()); final String tlsCaCert = CertUtils.x509CertificatesToPem(certificate.getCaCertificates()); + final Long waitTime = KubernetesClusterService.KubernetesControlNodeInstallAttemptWait.value(); + final Long reattempts = KubernetesClusterService.KubernetesControlNodeInstallReattempts.value(); k8sControlNodeConfig = k8sControlNodeConfig.replace(apiServerCert, tlsClientCert.replace("\n", "\n ")); k8sControlNodeConfig = k8sControlNodeConfig.replace(apiServerKey, tlsPrivateKey.replace("\n", "\n ")); k8sControlNodeConfig = k8sControlNodeConfig.replace(caCert, tlsCaCert.replace("\n", "\n ")); @@ -163,6 +168,8 @@ public class KubernetesClusterStartWorker extends KubernetesClusterResourceModif pubKey += "\n - \"" + sshkp.getPublicKey() + "\""; } } + k8sControlNodeConfig = k8sControlNodeConfig.replace(installWaitTime, String.valueOf(waitTime)); + k8sControlNodeConfig = k8sControlNodeConfig.replace(installReattemptsCount, String.valueOf(reattempts)); k8sControlNodeConfig = k8sControlNodeConfig.replace(sshPubKey, pubKey); k8sControlNodeConfig = k8sControlNodeConfig.replace(clusterToken, KubernetesClusterUtil.generateClusterToken(kubernetesCluster)); String initArgs = ""; @@ -244,6 +251,11 @@ public class KubernetesClusterStartWorker extends KubernetesClusterResourceModif final String sshPubKey = "{{ k8s.ssh.pub.key }}"; final String clusterHACertificateKey = "{{ k8s_control_node.cluster.ha.certificate.key }}"; final String ejectIsoKey = "{{ k8s.eject.iso }}"; + final String installWaitTime = "{{ k8s.install.wait.time }}"; + final String installReattemptsCount = "{{ k8s.install.reattempts.count }}"; + + final Long waitTime = KubernetesClusterService.KubernetesControlNodeInstallAttemptWait.value(); + final Long reattempts = KubernetesClusterService.KubernetesControlNodeInstallReattempts.value(); String pubKey = "- \"" + configurationDao.getValue("ssh.publickey") + "\""; String sshKeyPair = kubernetesCluster.getKeyPair(); if (StringUtils.isNotEmpty(sshKeyPair)) { @@ -252,6 +264,8 @@ public class KubernetesClusterStartWorker extends KubernetesClusterResourceModif pubKey += "\n - \"" + sshkp.getPublicKey() + "\""; } } + k8sControlNodeConfig = k8sControlNodeConfig.replace(installWaitTime, String.valueOf(waitTime)); + k8sControlNodeConfig = k8sControlNodeConfig.replace(installReattemptsCount, String.valueOf(reattempts)); k8sControlNodeConfig = k8sControlNodeConfig.replace(sshPubKey, pubKey); k8sControlNodeConfig = k8sControlNodeConfig.replace(joinIpKey, joinIp); k8sControlNodeConfig = k8sControlNodeConfig.replace(clusterTokenKey, KubernetesClusterUtil.generateClusterToken(kubernetesCluster)); diff --git a/plugins/integrations/kubernetes-service/src/main/resources/conf/k8s-control-node-add.yml b/plugins/integrations/kubernetes-service/src/main/resources/conf/k8s-control-node-add.yml index 2c18efa0189..35134bd1e16 100644 --- a/plugins/integrations/kubernetes-service/src/main/resources/conf/k8s-control-node-add.yml +++ b/plugins/integrations/kubernetes-service/src/main/resources/conf/k8s-control-node-add.yml @@ -42,8 +42,14 @@ write_files: ATTEMPT_ONLINE_INSTALL=false setup_complete=false - OFFLINE_INSTALL_ATTEMPT_SLEEP=15 - MAX_OFFLINE_INSTALL_ATTEMPTS=100 + OFFLINE_INSTALL_ATTEMPT_SLEEP={{ k8s.install.wait.time }} + MAX_OFFLINE_INSTALL_ATTEMPTS={{ k8s.install.reattempts.count }} + if [[ -z $OFFLINE_INSTALL_ATTEMPT_SLEEP || $OFFLINE_INSTALL_ATTEMPT_SLEEP -eq 0 ]]; then + OFFLINE_INSTALL_ATTEMPT_SLEEP=15 + fi + if [[ -z $MAX_OFFLINE_INSTALL_ATTEMPTS || $MAX_OFFLINE_INSTALL_ATTEMPTS -eq 0 ]]; then + MAX_OFFLINE_INSTALL_ATTEMPTS=100 + fi offline_attempts=1 MAX_SETUP_CRUCIAL_CMD_ATTEMPTS=3 EJECT_ISO_FROM_OS={{ k8s.eject.iso }} diff --git a/plugins/integrations/kubernetes-service/src/main/resources/conf/k8s-control-node.yml b/plugins/integrations/kubernetes-service/src/main/resources/conf/k8s-control-node.yml index aa7eec97ac8..3154fb20251 100644 --- a/plugins/integrations/kubernetes-service/src/main/resources/conf/k8s-control-node.yml +++ b/plugins/integrations/kubernetes-service/src/main/resources/conf/k8s-control-node.yml @@ -62,8 +62,14 @@ write_files: ATTEMPT_ONLINE_INSTALL=false setup_complete=false - OFFLINE_INSTALL_ATTEMPT_SLEEP=15 - MAX_OFFLINE_INSTALL_ATTEMPTS=100 + OFFLINE_INSTALL_ATTEMPT_SLEEP={{ k8s.install.wait.time }} + MAX_OFFLINE_INSTALL_ATTEMPTS={{ k8s.install.reattempts.count }} + if [[ -z $OFFLINE_INSTALL_ATTEMPT_SLEEP || $OFFLINE_INSTALL_ATTEMPT_SLEEP -eq 0 ]]; then + OFFLINE_INSTALL_ATTEMPT_SLEEP=15 + fi + if [[ -z $MAX_OFFLINE_INSTALL_ATTEMPTS || $MAX_OFFLINE_INSTALL_ATTEMPTS -eq 0 ]]; then + MAX_OFFLINE_INSTALL_ATTEMPTS=100 + fi offline_attempts=1 MAX_SETUP_CRUCIAL_CMD_ATTEMPTS=3 EJECT_ISO_FROM_OS={{ k8s.eject.iso }} diff --git a/plugins/integrations/kubernetes-service/src/main/resources/conf/k8s-node.yml b/plugins/integrations/kubernetes-service/src/main/resources/conf/k8s-node.yml index 4d34c2f7f1d..c3db5da7921 100644 --- a/plugins/integrations/kubernetes-service/src/main/resources/conf/k8s-node.yml +++ b/plugins/integrations/kubernetes-service/src/main/resources/conf/k8s-node.yml @@ -42,8 +42,14 @@ write_files: ATTEMPT_ONLINE_INSTALL=false setup_complete=false - OFFLINE_INSTALL_ATTEMPT_SLEEP=30 - MAX_OFFLINE_INSTALL_ATTEMPTS=40 + OFFLINE_INSTALL_ATTEMPT_SLEEP={{ k8s.install.wait.time }} + MAX_OFFLINE_INSTALL_ATTEMPTS={{ k8s.install.reattempts.count }} + if [[ -z $OFFLINE_INSTALL_ATTEMPT_SLEEP || $OFFLINE_INSTALL_ATTEMPT_SLEEP -eq 0 ]]; then + OFFLINE_INSTALL_ATTEMPT_SLEEP=30 + fi + if [[ -z $MAX_OFFLINE_INSTALL_ATTEMPTS || $MAX_OFFLINE_INSTALL_ATTEMPTS -eq 0 ]]; then + MAX_OFFLINE_INSTALL_ATTEMPTS=40 + fi offline_attempts=1 MAX_SETUP_CRUCIAL_CMD_ATTEMPTS=3 EJECT_ISO_FROM_OS={{ k8s.eject.iso }}