Skip to content

feat: automatic cluster detection #1068

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 10 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

1 change: 1 addition & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ educe = { version = "0.6.0", default-features = false, features = ["Clone", "De
either = "1.13.0"
futures = "0.3.30"
futures-util = "0.3.30"
http = "1.3.1"
indexmap = "2.5.0"
indoc = "2.0.6"
insta = { version= "1.40", features = ["glob"] }
Expand Down
5 changes: 5 additions & 0 deletions crates/stackable-operator/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,10 @@ All notable changes to this project will be documented in this file.

## [Unreleased]

### Added

- The default Kubernetes cluster domain name is now fetched from the kubelet API unless explicitely configured ([#1068]).

### Changed

- Update `kube` to `1.1.0` ([#1049]).
Expand All @@ -23,6 +27,7 @@ All notable changes to this project will be documented in this file.
[#1058]: https://github.com/stackabletech/operator-rs/pull/1058
[#1060]: https://github.com/stackabletech/operator-rs/pull/1060
[#1064]: https://github.com/stackabletech/operator-rs/pull/1064
[#1068]: https://github.com/stackabletech/operator-rs/pull/1068

## [0.93.2] - 2025-05-26

Expand Down
1 change: 1 addition & 0 deletions crates/stackable-operator/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@ dockerfile-parser.workspace = true
either.workspace = true
educe.workspace = true
futures.workspace = true
http.workspace = true
indexmap.workspace = true
json-patch.workspace = true
k8s-openapi.workspace = true
Expand Down
9 changes: 8 additions & 1 deletion crates/stackable-operator/src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,11 @@ pub enum Error {

#[snafu(display("unable to create kubernetes client"))]
CreateKubeClient { source: kube::Error },

#[snafu(display("unable to fetch cluster information from kubelet"))]
NewKubeletClusterInfo {
source: crate::utils::cluster_info::Error,
},
}

/// This `Client` can be used to access Kubernetes.
Expand Down Expand Up @@ -651,7 +656,9 @@ pub async fn initialize_operator(
.context(InferKubeConfigSnafu)?;
let default_namespace = kubeconfig.default_namespace.clone();
let client = kube::Client::try_from(kubeconfig).context(CreateKubeClientSnafu)?;
let cluster_info = KubernetesClusterInfo::new(cluster_info_opts);
let cluster_info = KubernetesClusterInfo::new(&client, cluster_info_opts)
.await
.context(NewKubeletClusterInfoSnafu)?;

Ok(Client::new(
client,
Expand Down
31 changes: 19 additions & 12 deletions crates/stackable-operator/src/utils/cluster_info.rs
Original file line number Diff line number Diff line change
@@ -1,10 +1,14 @@
use std::str::FromStr;
use kube::Client;
use snafu::{ResultExt, Snafu};

use crate::commons::networking::DomainName;
use crate::{commons::networking::DomainName, utils::kubelet};

const KUBERNETES_CLUSTER_DOMAIN_DEFAULT: &str = "cluster.local";
#[derive(Debug, Snafu)]
pub enum Error {
#[snafu(display("unable to fetch kubelet config"))]
KubeletConfig { source: kubelet::Error },
}

/// Some information that we know about the Kubernetes cluster.
#[derive(Debug, Clone)]
pub struct KubernetesClusterInfo {
/// The Kubernetes cluster domain, typically `cluster.local`.
Expand All @@ -21,25 +25,28 @@ pub struct KubernetesClusterInfoOpts {
}

impl KubernetesClusterInfo {
pub fn new(cluster_info_opts: &KubernetesClusterInfoOpts) -> Self {
pub async fn new(
client: &Client,
cluster_info_opts: &KubernetesClusterInfoOpts,
) -> Result<Self, Error> {
let cluster_domain = match &cluster_info_opts.kubernetes_cluster_domain {
Some(cluster_domain) => {
tracing::info!(%cluster_domain, "Using configured Kubernetes cluster domain");

cluster_domain.clone()
}
None => {
// TODO(sbernauer): Do some sort of advanced auto-detection, see https://github.com/stackabletech/issues/issues/436.
// There have been attempts of parsing the `/etc/resolv.conf`, but they have been
// reverted. Please read on the linked issue for details.
let cluster_domain = DomainName::from_str(KUBERNETES_CLUSTER_DOMAIN_DEFAULT)
.expect("KUBERNETES_CLUSTER_DOMAIN_DEFAULT constant must a valid domain");
tracing::info!(%cluster_domain, "Defaulting Kubernetes cluster domain as it has not been configured");
let kubelet_config = kubelet::KubeletConfig::fetch(client)
.await
.context(KubeletConfigSnafu)?;

let cluster_domain = kubelet_config.cluster_domain;
tracing::info!(%cluster_domain, "Using Kubernetes cluster domain from the kubelet config");

cluster_domain
}
};

Self { cluster_domain }
Ok(Self { cluster_domain })
}
}
74 changes: 74 additions & 0 deletions crates/stackable-operator/src/utils/kubelet.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
use http;
use k8s_openapi::api::core::v1::Node;
use kube::{
Api,
api::{ListParams, ResourceExt},
client::Client,
};
use serde::Deserialize;
use snafu::{OptionExt, ResultExt, Snafu};

use crate::commons::networking::DomainName;

#[derive(Debug, Snafu)]
pub enum Error {
#[snafu(display("failed to list nodes"))]
ListNodes { source: kube::Error },

#[snafu(display("failed to build request for url path \"{url_path}\""))]
BuildConfigzRequest {
source: http::Error,
url_path: String,
},

#[snafu(display("failed to fetch kubelet config from node {node:?}"))]
FetchNodeKubeletConfig { source: kube::Error, node: String },

#[snafu(display("failed to fetch `kubeletconfig` JSON key from configz response"))]
KubeletConfigJsonKey,

#[snafu(display("failed to deserialize kubelet config JSON"))]
KubeletConfigJson { source: serde_json::Error },

#[snafu(display(
"empty Kubernetes nodes list. At least one node is required to fetch the cluster domain from the kubelet config"
))]
EmptyKubernetesNodesList,
}

#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
struct ProxyConfigResponse {
kubeletconfig: KubeletConfig,
}

#[derive(Debug, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct KubeletConfig {
pub cluster_domain: DomainName,
}

impl KubeletConfig {
/// Fetches the kubelet configuration from the "first" node in the Kubernetes cluster.
pub async fn fetch(client: &Client) -> Result<Self, Error> {
let api: Api<Node> = Api::all(client.clone());
let nodes = api
.list(&ListParams::default())
.await
.context(ListNodesSnafu)?;
let node = nodes.iter().next().context(EmptyKubernetesNodesListSnafu)?;
let node_name = node.name_any();

let url_path = format!("/api/v1/nodes/{node_name}/proxy/configz");
let req = http::Request::get(url_path.clone())
.body(Default::default())
.context(BuildConfigzRequestSnafu { url_path })?;

let resp = client
.request::<ProxyConfigResponse>(req)
.await
.context(FetchNodeKubeletConfigSnafu { node: node_name })?;

Ok(resp.kubeletconfig)
}
}
1 change: 1 addition & 0 deletions crates/stackable-operator/src/utils/mod.rs
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
pub mod bash;
pub mod cluster_info;
pub mod crds;
pub mod kubelet;
pub mod logging;
mod option;
mod url;
Expand Down