Skip to content

Use the local kubelet's cluster domain, rather than a random one #1071

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Merged
merged 4 commits into from
Jul 3, 2025
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
13 changes: 12 additions & 1 deletion crates/stackable-operator/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ All notable changes to this project will be documented in this file.

### Added

- The default Kubernetes cluster domain name is now fetched from the kubelet API unless explicitly configured ([#1068])
- The default Kubernetes cluster domain name is now fetched from the kubelet API unless explicitly configured ([#1068], [#1071])
This requires operators to have the RBAC permission to `get` `nodes/proxy` in the apiGroup "", an example RBAC rule could look like:

```yaml
Expand All @@ -21,6 +21,16 @@ All notable changes to this project will be documented in this file.
verbs: [get]
```

In addition, they must be provided the environment variable `KUBERNETES_NODE_NAME` like this:

```yaml
env:
- name: KUBERNETES_NODE_NAME
valueFrom:
fieldRef:
fieldPath: spec.nodeName
```

### Changed

- Update `kube` to `1.1.0` ([#1049]).
Expand All @@ -41,6 +51,7 @@ All notable changes to this project will be documented in this file.
[#1060]: https://github.com/stackabletech/operator-rs/pull/1060
[#1064]: https://github.com/stackabletech/operator-rs/pull/1064
[#1068]: https://github.com/stackabletech/operator-rs/pull/1068
[#1071]: https://github.com/stackabletech/operator-rs/pull/1071

## [0.93.2] - 2025-05-26

Expand Down
42 changes: 34 additions & 8 deletions crates/stackable-operator/src/cli.rs
Original file line number Diff line number Diff line change
Expand Up @@ -165,7 +165,7 @@ pub enum Command<Run: Args = ProductOperatorRun> {
/// ```rust
/// # use stackable_operator::cli::{Command, ProductOperatorRun, ProductConfigPath};
/// use clap::Parser;
/// use stackable_operator::namespace::WatchNamespace;
/// use stackable_operator::{namespace::WatchNamespace, utils::cluster_info::KubernetesClusterInfoOpts};
/// use stackable_telemetry::tracing::TelemetryOptions;
///
/// #[derive(clap::Parser, Debug, PartialEq, Eq)]
Expand All @@ -176,14 +176,17 @@ pub enum Command<Run: Args = ProductOperatorRun> {
/// common: ProductOperatorRun,
/// }
///
/// let opts = Command::<Run>::parse_from(["foobar-operator", "run", "--name", "foo", "--product-config", "bar", "--watch-namespace", "foobar"]);
/// let opts = Command::<Run>::parse_from(["foobar-operator", "run", "--name", "foo", "--product-config", "bar", "--watch-namespace", "foobar", "--kubernetes-node-name", "baz"]);
/// assert_eq!(opts, Command::Run(Run {
/// name: "foo".to_string(),
/// common: ProductOperatorRun {
/// product_config: ProductConfigPath::from("bar".as_ref()),
/// watch_namespace: WatchNamespace::One("foobar".to_string()),
/// telemetry_arguments: TelemetryOptions::default(),
/// cluster_info_opts: Default::default(),
/// cluster_info_opts: KubernetesClusterInfoOpts {
/// kubernetes_cluster_domain: None,
/// kubernetes_node_name: "baz".to_string(),
/// },
/// },
/// }));
/// ```
Expand Down Expand Up @@ -388,38 +391,61 @@ mod tests {
"bar",
"--watch-namespace",
"foo",
"--kubernetes-node-name",
"baz",
]);
assert_eq!(
opts,
ProductOperatorRun {
product_config: ProductConfigPath::from("bar".as_ref()),
watch_namespace: WatchNamespace::One("foo".to_string()),
cluster_info_opts: Default::default(),
cluster_info_opts: KubernetesClusterInfoOpts {
kubernetes_cluster_domain: None,
kubernetes_node_name: "baz".to_string()
},
telemetry_arguments: Default::default(),
}
);

// no cli / no env
let opts = ProductOperatorRun::parse_from(["run", "--product-config", "bar"]);
let opts = ProductOperatorRun::parse_from([
"run",
"--product-config",
"bar",
"--kubernetes-node-name",
"baz",
]);
assert_eq!(
opts,
ProductOperatorRun {
product_config: ProductConfigPath::from("bar".as_ref()),
watch_namespace: WatchNamespace::All,
cluster_info_opts: Default::default(),
cluster_info_opts: KubernetesClusterInfoOpts {
kubernetes_cluster_domain: None,
kubernetes_node_name: "baz".to_string()
},
telemetry_arguments: Default::default(),
}
);

// env with namespace
unsafe { env::set_var(WATCH_NAMESPACE, "foo") };
let opts = ProductOperatorRun::parse_from(["run", "--product-config", "bar"]);
let opts = ProductOperatorRun::parse_from([
"run",
"--product-config",
"bar",
"--kubernetes-node-name",
"baz",
]);
assert_eq!(
opts,
ProductOperatorRun {
product_config: ProductConfigPath::from("bar".as_ref()),
watch_namespace: WatchNamespace::One("foo".to_string()),
cluster_info_opts: Default::default(),
cluster_info_opts: KubernetesClusterInfoOpts {
kubernetes_cluster_domain: None,
kubernetes_node_name: "baz".to_string()
},
telemetry_arguments: Default::default(),
}
);
Expand Down
32 changes: 26 additions & 6 deletions crates/stackable-operator/src/client.rs
Original file line number Diff line number Diff line change
Expand Up @@ -523,15 +523,19 @@ impl Client {
///
/// ```no_run
/// use std::time::Duration;
/// use clap::Parser;
/// use tokio::time::error::Elapsed;
/// use kube::runtime::watcher;
/// use k8s_openapi::api::core::v1::Pod;
/// use stackable_operator::client::{Client, initialize_operator};
/// use stackable_operator::{
/// client::{Client, initialize_operator},
/// utils::cluster_info::KubernetesClusterInfoOpts,
/// };
///
/// #[tokio::main]
/// async fn main() {
///
/// let client = initialize_operator(None, &Default::default())
/// let cluster_info_opts = KubernetesClusterInfoOpts::parse();
/// let client = initialize_operator(None, &cluster_info_opts)
/// .await
/// .expect("Unable to construct client.");
/// let watcher_config: watcher::Config =
Expand Down Expand Up @@ -683,10 +687,26 @@ mod tests {
};
use tokio::time::error::Elapsed;

use crate::utils::cluster_info::KubernetesClusterInfoOpts;

async fn test_cluster_info_opts() -> KubernetesClusterInfoOpts {
KubernetesClusterInfoOpts {
// We have to hard-code a made-up cluster domain,
// since kubernetes_node_name (probably) won't be a valid Node that we can query.
kubernetes_cluster_domain: Some(
"fake-cluster.local"
.parse()
.expect("hard-coded cluster domain must be valid"),
),
// Tests aren't running in a kubelet, so make up a name of one.
kubernetes_node_name: "fake-node-name".to_string(),
}
}

#[tokio::test]
#[ignore = "Tests depending on Kubernetes are not ran by default"]
async fn k8s_test_wait_created() {
let client = super::initialize_operator(None, &Default::default())
let client = super::initialize_operator(None, &test_cluster_info_opts().await)
.await
.expect("KUBECONFIG variable must be configured.");

Expand Down Expand Up @@ -764,7 +784,7 @@ mod tests {
#[tokio::test]
#[ignore = "Tests depending on Kubernetes are not ran by default"]
async fn k8s_test_wait_created_timeout() {
let client = super::initialize_operator(None, &Default::default())
let client = super::initialize_operator(None, &test_cluster_info_opts().await)
.await
.expect("KUBECONFIG variable must be configured.");

Expand All @@ -784,7 +804,7 @@ mod tests {
#[tokio::test]
#[ignore = "Tests depending on Kubernetes are not ran by default"]
async fn k8s_test_list_with_label_selector() {
let client = super::initialize_operator(None, &Default::default())
let client = super::initialize_operator(None, &test_cluster_info_opts().await)
.await
.expect("KUBECONFIG variable must be configured.");

Expand Down
24 changes: 17 additions & 7 deletions crates/stackable-operator/src/utils/cluster_info.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,28 +15,38 @@ pub struct KubernetesClusterInfo {
pub cluster_domain: DomainName,
}

#[derive(clap::Parser, Debug, Default, PartialEq, Eq)]
#[derive(clap::Parser, Debug, PartialEq, Eq)]
pub struct KubernetesClusterInfoOpts {
/// Kubernetes cluster domain, usually this is `cluster.local`.
// We are not using a default value here, as operators will probably do an more advanced
// auto-detection of the cluster domain in case it is not specified in the future.
// We are not using a default value here, as we query the cluster if it is not specified.
#[arg(long, env)]
pub kubernetes_cluster_domain: Option<DomainName>,

/// Name of the Kubernetes Node that the operator is running on.
#[arg(long, env)]
pub kubernetes_node_name: String,
}

impl KubernetesClusterInfo {
pub async fn new(
client: &Client,
cluster_info_opts: &KubernetesClusterInfoOpts,
) -> Result<Self, Error> {
let cluster_domain = match &cluster_info_opts.kubernetes_cluster_domain {
Some(cluster_domain) => {
let cluster_domain = match cluster_info_opts {
KubernetesClusterInfoOpts {
kubernetes_cluster_domain: Some(cluster_domain),
..
} => {
tracing::info!(%cluster_domain, "Using configured Kubernetes cluster domain");

cluster_domain.clone()
}
None => {
let kubelet_config = kubelet::KubeletConfig::fetch(client)
KubernetesClusterInfoOpts {
kubernetes_node_name: node_name,
..
} => {
tracing::info!(%node_name, "Fetching Kubernetes cluster domain from the local kubelet");
let kubelet_config = kubelet::KubeletConfig::fetch(client, node_name)
.await
.context(KubeletConfigSnafu)?;

Expand Down
29 changes: 4 additions & 25 deletions crates/stackable-operator/src/utils/kubelet.rs
Original file line number Diff line number Diff line change
@@ -1,20 +1,12 @@
use http;
use k8s_openapi::api::core::v1::Node;
use kube::{
Api,
api::{ListParams, ResourceExt},
client::Client,
};
use kube::client::Client;
use serde::Deserialize;
use snafu::{OptionExt, ResultExt, Snafu};
use snafu::{ResultExt, Snafu};

use crate::commons::networking::DomainName;

#[derive(Debug, Snafu)]
pub enum Error {
#[snafu(display("failed to list nodes"))]
ListNodes { source: kube::Error },

#[snafu(display("failed to build request for url path \"{url_path}\""))]
BuildConfigzRequest {
source: http::Error,
Expand All @@ -29,11 +21,6 @@ pub enum Error {

#[snafu(display("failed to deserialize kubelet config JSON"))]
KubeletConfigJson { source: serde_json::Error },

#[snafu(display(
"empty Kubernetes nodes list. At least one node is required to fetch the cluster domain from the kubelet config"
))]
EmptyKubernetesNodesList,
}

#[derive(Debug, Deserialize)]
Expand All @@ -49,16 +36,8 @@ pub struct KubeletConfig {
}

impl KubeletConfig {
/// Fetches the kubelet configuration from the "first" node in the Kubernetes cluster.
pub async fn fetch(client: &Client) -> Result<Self, Error> {
let api: Api<Node> = Api::all(client.clone());
let nodes = api
.list(&ListParams::default())
.await
.context(ListNodesSnafu)?;
let node = nodes.iter().next().context(EmptyKubernetesNodesListSnafu)?;
let node_name = node.name_any();

/// Fetches the kubelet configuration from the specified node in the Kubernetes cluster.
pub async fn fetch(client: &Client, node_name: &str) -> Result<Self, Error> {
let url_path = format!("/api/v1/nodes/{node_name}/proxy/configz");
let req = http::Request::get(url_path.clone())
.body(Default::default())
Expand Down