From 42b93c0bc2ce30b5f11004e9ed972e9d11476c97 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Thu, 16 Oct 2025 15:59:27 +0200 Subject: [PATCH 1/6] remove headless & metrics methods, use op-rs methods --- rust/operator-binary/src/crd/mod.rs | 6 ++-- rust/operator-binary/src/discovery.rs | 12 -------- rust/operator-binary/src/product_logging.rs | 15 ---------- rust/operator-binary/src/zk_controller.rs | 33 +++------------------ 4 files changed, 6 insertions(+), 60 deletions(-) diff --git a/rust/operator-binary/src/crd/mod.rs b/rust/operator-binary/src/crd/mod.rs index 58d3ab1d..bf03f7c4 100644 --- a/rust/operator-binary/src/crd/mod.rs +++ b/rust/operator-binary/src/crd/mod.rs @@ -36,7 +36,6 @@ use strum::{Display, EnumIter, EnumString, IntoEnumIterator}; use crate::{ crd::{affinity::get_affinity, v1alpha1::ZookeeperServerRoleConfig}, - discovery::build_role_group_headless_service_name, listener::role_listener_name, }; @@ -626,9 +625,8 @@ impl v1alpha1::ZookeeperCluster { for i in 0..rolegroup.replicas.unwrap_or(1) { pod_refs.push(ZookeeperPodRef { namespace: ns.clone(), - role_group_headless_service_name: build_role_group_headless_service_name( - rolegroup_ref.object_name(), - ), + role_group_headless_service_name: rolegroup_ref + .rolegroup_headless_service_name(), pod_name: format!("{role_group}-{i}", role_group = rolegroup_ref.object_name()), zookeeper_myid: i + myid_offset, }); diff --git a/rust/operator-binary/src/discovery.rs b/rust/operator-binary/src/discovery.rs index 3ed0012f..3e0f0631 100644 --- a/rust/operator-binary/src/discovery.rs +++ b/rust/operator-binary/src/discovery.rs @@ -168,15 +168,3 @@ fn listener_addresses( false => Ok(address_port_pairs), } } - -// TODO (@NickLarsenNZ): Implement this directly on RoleGroupRef, ie: -// RoleGroupRef::metrics_service_name(&self) to restrict what _name_ can be. -pub fn build_role_group_headless_service_name(name: String) -> String { - format!("{name}-headless") -} - -// TODO (@NickLarsenNZ): Implement this directly on RoleGroupRef, ie: -// RoleGroupRef::metrics_service_name(&self) to restrict what _name_ can be. -pub fn build_role_group_metrics_service_name(name: String) -> String { - format!("{name}-metrics") -} diff --git a/rust/operator-binary/src/product_logging.rs b/rust/operator-binary/src/product_logging.rs index 8941965f..a9e54346 100644 --- a/rust/operator-binary/src/product_logging.rs +++ b/rust/operator-binary/src/product_logging.rs @@ -16,21 +16,6 @@ use crate::crd::{ #[derive(Snafu, Debug)] pub enum Error { - #[snafu(display("object has no namespace"))] - ObjectHasNoNamespace, - - #[snafu(display("failed to retrieve the ConfigMap {cm_name}"))] - ConfigMapNotFound { - source: stackable_operator::client::Error, - cm_name: String, - }, - - #[snafu(display("failed to retrieve the entry {entry} for ConfigMap {cm_name}"))] - MissingConfigMapEntry { - entry: &'static str, - cm_name: String, - }, - #[snafu(display("crd validation failure"))] CrdValidationFailure { source: crate::crd::Error }, } diff --git a/rust/operator-binary/src/zk_controller.rs b/rust/operator-binary/src/zk_controller.rs index 713c4c90..f30ad2fc 100644 --- a/rust/operator-binary/src/zk_controller.rs +++ b/rust/operator-binary/src/zk_controller.rs @@ -90,10 +90,7 @@ use crate::{ security::{self, ZookeeperSecurity}, v1alpha1::{self, ZookeeperServerRoleConfig}, }, - discovery::{ - self, build_discovery_configmap, build_role_group_headless_service_name, - build_role_group_metrics_service_name, - }, + discovery::{self, build_discovery_configmap}, listener::{build_role_listener, role_listener_name}, operations::{graceful_shutdown::add_graceful_shutdown_config, pdb::add_pdbs}, product_logging::extend_role_group_config_map, @@ -139,19 +136,6 @@ pub enum Error { #[snafu(display("internal operator failure"))] InternalOperatorFailure { source: crate::crd::Error }, - #[snafu(display("failed to calculate global service name"))] - GlobalServiceNameNotFound, - - #[snafu(display("failed to calculate service name for role {}", rolegroup))] - RoleGroupServiceNameNotFound { - rolegroup: RoleGroupRef, - }, - - #[snafu(display("failed to apply global Service"))] - ApplyRoleService { - source: stackable_operator::cluster_resources::Error, - }, - #[snafu(display("failed to apply Service for {}", rolegroup))] ApplyRoleGroupService { source: stackable_operator::cluster_resources::Error, @@ -319,9 +303,6 @@ impl ReconcilerError for Error { Error::NoServerRole => None, Error::RoleParseFailure { .. } => None, Error::InternalOperatorFailure { .. } => None, - Error::GlobalServiceNameNotFound => None, - Error::RoleGroupServiceNameNotFound { .. } => None, - Error::ApplyRoleService { .. } => None, Error::ApplyRoleGroupService { .. } => None, Error::BuildRoleGroupConfig { .. } => None, Error::ApplyRoleGroupConfig { .. } => None, @@ -682,9 +663,7 @@ fn build_server_rolegroup_headless_service( ) -> Result { let metadata = ObjectMetaBuilder::new() .name_and_namespace(zk) - .name(build_role_group_headless_service_name( - rolegroup.object_name(), - )) + .name(rolegroup.rolegroup_headless_service_name()) .ownerreference_from_resource(zk, None, Some(true)) .context(ObjectMissingMetadataForOwnerRefSnafu)? .with_recommended_labels(build_recommended_labels( @@ -743,9 +722,7 @@ fn build_server_rolegroup_metrics_service( let metadata = ObjectMetaBuilder::new() .name_and_namespace(zk) - .name(build_role_group_metrics_service_name( - rolegroup.object_name(), - )) + .name(rolegroup.rolegroup_metrics_service_name()) .ownerreference_from_resource(zk, None, Some(true)) .context(ObjectMissingMetadataForOwnerRefSnafu)? .with_recommended_labels(build_recommended_labels( @@ -1161,9 +1138,7 @@ fn build_server_rolegroup_statefulset( match_labels: Some(statefulset_match_labels.into()), ..LabelSelector::default() }, - service_name: Some(build_role_group_headless_service_name( - rolegroup_ref.object_name(), - )), + service_name: Some(rolegroup_ref.rolegroup_headless_service_name()), template: pod_template, volume_claim_templates: Some(pvcs), ..StatefulSetSpec::default() From 4d854865aee4f932e08c0c49f94e8d383f02d8ba Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Mon, 20 Oct 2025 09:00:40 +0200 Subject: [PATCH 2/6] add prometheus annotations to metrics service --- rust/operator-binary/src/main.rs | 1 + rust/operator-binary/src/service.rs | 203 ++++++++++++++++++++++ rust/operator-binary/src/zk_controller.rs | 172 +++--------------- 3 files changed, 225 insertions(+), 151 deletions(-) create mode 100644 rust/operator-binary/src/service.rs diff --git a/rust/operator-binary/src/main.rs b/rust/operator-binary/src/main.rs index 3ae680fb..ebf35911 100644 --- a/rust/operator-binary/src/main.rs +++ b/rust/operator-binary/src/main.rs @@ -41,6 +41,7 @@ mod discovery; mod listener; mod operations; mod product_logging; +mod service; mod utils; mod zk_controller; mod znode_controller; diff --git a/rust/operator-binary/src/service.rs b/rust/operator-binary/src/service.rs new file mode 100644 index 00000000..0f7423b0 --- /dev/null +++ b/rust/operator-binary/src/service.rs @@ -0,0 +1,203 @@ +use std::{ + collections::{BTreeMap, HashMap}, + str::FromStr, +}; + +use product_config::types::PropertyNameKind; +use serde::de::IntoDeserializer; +use snafu::{ResultExt, Snafu}; +use stackable_operator::{ + builder::meta::ObjectMetaBuilder, + commons::product_image_selection::ResolvedProductImage, + k8s_openapi::api::core::v1::{Service, ServicePort, ServiceSpec}, + kvp::{Annotations, Labels}, + role_utils::RoleGroupRef, +}; + +use crate::{ + crd::{ + APP_NAME, JMX_METRICS_PORT, JMX_METRICS_PORT_NAME, METRICS_PROVIDER_HTTP_PORT, + METRICS_PROVIDER_HTTP_PORT_KEY, METRICS_PROVIDER_HTTP_PORT_NAME, ZOOKEEPER_ELECTION_PORT, + ZOOKEEPER_ELECTION_PORT_NAME, ZOOKEEPER_LEADER_PORT, ZOOKEEPER_LEADER_PORT_NAME, + ZOOKEEPER_PROPERTIES_FILE, v1alpha1, + }, + utils::build_recommended_labels, + zk_controller::ZK_CONTROLLER_NAME, +}; + +#[derive(Snafu, Debug)] +pub enum Error { + #[snafu(display("object is missing metadata to build owner reference"))] + ObjectMissingMetadataForOwnerRef { + source: stackable_operator::builder::meta::Error, + }, + + #[snafu(display("failed to build Metadata"))] + BuildMetadata { + source: stackable_operator::builder::meta::Error, + }, + + #[snafu(display("failed to build Labels"))] + BuildLabel { + source: stackable_operator::kvp::LabelError, + }, +} + +/// The rolegroup [`Service`] is a headless service that allows internal access to the instances of a certain rolegroup +/// +/// This is mostly useful for internal communication between peers, or for clients that perform client-side load balancing. +pub(crate) fn build_server_rolegroup_headless_service( + zk: &v1alpha1::ZookeeperCluster, + rolegroup: &RoleGroupRef, + resolved_product_image: &ResolvedProductImage, +) -> Result { + let metadata = ObjectMetaBuilder::new() + .name_and_namespace(zk) + .name(rolegroup.rolegroup_headless_service_name()) + .ownerreference_from_resource(zk, None, Some(true)) + .context(ObjectMissingMetadataForOwnerRefSnafu)? + .with_recommended_labels(build_recommended_labels( + zk, + ZK_CONTROLLER_NAME, + &resolved_product_image.app_version_label_value, + &rolegroup.role, + &rolegroup.role_group, + )) + .context(BuildMetadataSnafu)? + .build(); + + let service_selector_labels = + Labels::role_group_selector(zk, APP_NAME, &rolegroup.role, &rolegroup.role_group) + .context(BuildLabelSnafu)?; + + let service_spec = ServiceSpec { + // Internal communication does not need to be exposed + type_: Some("ClusterIP".to_string()), + cluster_ip: Some("None".to_string()), + ports: Some(vec![ + ServicePort { + name: Some(ZOOKEEPER_LEADER_PORT_NAME.to_string()), + port: ZOOKEEPER_LEADER_PORT.into(), + protocol: Some("TCP".to_string()), + ..ServicePort::default() + }, + ServicePort { + name: Some(ZOOKEEPER_ELECTION_PORT_NAME.to_string()), + port: ZOOKEEPER_ELECTION_PORT.into(), + protocol: Some("TCP".to_string()), + ..ServicePort::default() + }, + ]), + selector: Some(service_selector_labels.into()), + publish_not_ready_addresses: Some(true), + ..ServiceSpec::default() + }; + + Ok(Service { + metadata, + spec: Some(service_spec), + status: None, + }) +} + +/// The rolegroup [`Service`] for exposing metrics +pub(crate) fn build_server_rolegroup_metrics_service( + zk: &v1alpha1::ZookeeperCluster, + rolegroup: &RoleGroupRef, + resolved_product_image: &ResolvedProductImage, + rolegroup_config: &HashMap>, +) -> Result { + let metrics_port = metrics_port_from_rolegroup_config(rolegroup_config); + + let metadata = ObjectMetaBuilder::new() + .name_and_namespace(zk) + .name(rolegroup.rolegroup_metrics_service_name()) + .ownerreference_from_resource(zk, None, Some(true)) + .context(ObjectMissingMetadataForOwnerRefSnafu)? + .with_recommended_labels(build_recommended_labels( + zk, + ZK_CONTROLLER_NAME, + &resolved_product_image.app_version_label_value, + &rolegroup.role, + &rolegroup.role_group, + )) + .context(BuildMetadataSnafu)? + .with_labels(prometheus_labels()) + .with_annotations(prometheus_annotations(metrics_port)) + .build(); + + let service_selector_labels = + Labels::role_group_selector(zk, APP_NAME, &rolegroup.role, &rolegroup.role_group) + .context(BuildLabelSnafu)?; + + let service_spec = ServiceSpec { + // Internal communication does not need to be exposed + type_: Some("ClusterIP".to_string()), + cluster_ip: Some("None".to_string()), + ports: Some(vec![ + // We keep this for legacy compatibility + ServicePort { + name: Some(JMX_METRICS_PORT_NAME.to_string()), + port: JMX_METRICS_PORT.into(), + protocol: Some("TCP".to_string()), + ..ServicePort::default() + }, + ServicePort { + name: Some(METRICS_PROVIDER_HTTP_PORT_NAME.to_string()), + port: metrics_port.into(), + protocol: Some("TCP".to_string()), + ..ServicePort::default() + }, + ]), + selector: Some(service_selector_labels.into()), + publish_not_ready_addresses: Some(true), + ..ServiceSpec::default() + }; + + Ok(Service { + metadata, + spec: Some(service_spec), + status: None, + }) +} + +pub(crate) fn metrics_port_from_rolegroup_config( + rolegroup_config: &HashMap>, +) -> u16 { + let metrics_port = rolegroup_config + .get(&PropertyNameKind::File( + ZOOKEEPER_PROPERTIES_FILE.to_string(), + )) + .expect("{ZOOKEEPER_PROPERTIES_FILE} is present") + .get(METRICS_PROVIDER_HTTP_PORT_KEY) + .expect("{METRICS_PROVIDER_HTTP_PORT_KEY} is set"); + + match u16::from_str(metrics_port) { + Ok(port) => port, + Err(err) => { + tracing::error!("{err}"); + tracing::info!("Defaulting to using {METRICS_PROVIDER_HTTP_PORT} as metrics port."); + METRICS_PROVIDER_HTTP_PORT + } + } +} + +/// Common labels for Prometheus +fn prometheus_labels() -> Labels { + Labels::try_from([("prometheus.io/scrape", "true")]).expect("should be a valid label") +} + +/// Common annotations for Prometheus +/// +/// These annotations can be used in a ServiceMonitor. +/// +/// see also +fn prometheus_annotations(metrics_port: u16) -> Annotations { + Annotations::try_from([ + ("prometheus.io/path".to_owned(), "/metrics".to_owned()), + ("prometheus.io/port".to_owned(), metrics_port.to_string()), + ("prometheus.io/scheme".to_owned(), "http".to_owned()), + ("prometheus.io/scrape".to_owned(), "true".to_owned()), + ]) + .expect("should be valid annotations") +} diff --git a/rust/operator-binary/src/zk_controller.rs b/rust/operator-binary/src/zk_controller.rs index f30ad2fc..caeb5b88 100644 --- a/rust/operator-binary/src/zk_controller.rs +++ b/rust/operator-binary/src/zk_controller.rs @@ -40,7 +40,7 @@ use stackable_operator::{ core::v1::{ ConfigMap, ConfigMapVolumeSource, EmptyDirVolumeSource, EnvVar, EnvVarSource, ExecAction, ObjectFieldSelector, PersistentVolumeClaim, PodSecurityContext, Probe, - Service, ServiceAccount, ServicePort, ServiceSpec, Volume, + ServiceAccount, Volume, }, }, apimachinery::pkg::apis::meta::v1::LabelSelector, @@ -51,7 +51,7 @@ use stackable_operator::{ core::{DeserializeGuard, error_boundary}, runtime::controller, }, - kvp::{Label, LabelError, Labels}, + kvp::{LabelError, Labels}, logging::controller::ReconcilerError, product_config_utils::{transform_all_roles_to_config, validate_all_roles_and_groups_config}, product_logging::{ @@ -79,14 +79,12 @@ use crate::{ command::create_init_container_command_args, config::jvm::{construct_non_heap_jvm_args, construct_zk_server_heap_env}, crd::{ - DOCKER_IMAGE_BASE_NAME, JMX_METRICS_PORT, JMX_METRICS_PORT_NAME, - JVM_SECURITY_PROPERTIES_FILE, MAX_PREPARE_LOG_FILE_SIZE, MAX_ZK_LOG_FILES_SIZE, - METRICS_PROVIDER_HTTP_PORT, METRICS_PROVIDER_HTTP_PORT_KEY, - METRICS_PROVIDER_HTTP_PORT_NAME, STACKABLE_CONFIG_DIR, STACKABLE_DATA_DIR, - STACKABLE_LOG_CONFIG_DIR, STACKABLE_LOG_DIR, STACKABLE_RW_CONFIG_DIR, - ZOOKEEPER_ELECTION_PORT, ZOOKEEPER_ELECTION_PORT_NAME, ZOOKEEPER_LEADER_PORT, - ZOOKEEPER_LEADER_PORT_NAME, ZOOKEEPER_PROPERTIES_FILE, ZOOKEEPER_SERVER_PORT_NAME, - ZookeeperRole, + DOCKER_IMAGE_BASE_NAME, JMX_METRICS_PORT_NAME, JVM_SECURITY_PROPERTIES_FILE, + MAX_PREPARE_LOG_FILE_SIZE, MAX_ZK_LOG_FILES_SIZE, METRICS_PROVIDER_HTTP_PORT_NAME, + STACKABLE_CONFIG_DIR, STACKABLE_DATA_DIR, STACKABLE_LOG_CONFIG_DIR, STACKABLE_LOG_DIR, + STACKABLE_RW_CONFIG_DIR, ZOOKEEPER_ELECTION_PORT, ZOOKEEPER_ELECTION_PORT_NAME, + ZOOKEEPER_LEADER_PORT, ZOOKEEPER_LEADER_PORT_NAME, ZOOKEEPER_PROPERTIES_FILE, + ZOOKEEPER_SERVER_PORT_NAME, ZookeeperRole, security::{self, ZookeeperSecurity}, v1alpha1::{self, ZookeeperServerRoleConfig}, }, @@ -94,6 +92,10 @@ use crate::{ listener::{build_role_listener, role_listener_name}, operations::{graceful_shutdown::add_graceful_shutdown_config, pdb::add_pdbs}, product_logging::extend_role_group_config_map, + service::{ + self, build_server_rolegroup_headless_service, build_server_rolegroup_metrics_service, + metrics_port_from_rolegroup_config, + }, utils::build_recommended_labels, }; @@ -242,7 +244,7 @@ pub enum Error { #[snafu(display("failed to build label"))] BuildLabel { source: LabelError }, - #[snafu(display("failed to build object meta data"))] + #[snafu(display("failed to build object meta data"))] ObjectMeta { source: stackable_operator::builder::meta::Error, }, @@ -288,6 +290,9 @@ pub enum Error { ResolveProductImage { source: product_image_selection::Error, }, + + #[snafu(display("failed to build service"))] + BuildService { source: service::Error }, } impl ReconcilerError for Error { @@ -336,6 +341,7 @@ impl ReconcilerError for Error { Error::BuildListenerPersistentVolume { .. } => None, Error::ListenerConfiguration { .. } => None, Error::ResolveProductImage { .. } => None, + Error::BuildService { .. } => None, } } } @@ -428,13 +434,15 @@ pub async fn reconcile_zk( .context(FailedToResolveConfigSnafu)?; let rg_headless_service = - build_server_rolegroup_headless_service(zk, &rolegroup, &resolved_product_image)?; + build_server_rolegroup_headless_service(zk, &rolegroup, &resolved_product_image) + .context(BuildServiceSnafu)?; let rg_metrics_service = build_server_rolegroup_metrics_service( zk, &rolegroup, &resolved_product_image, rolegroup_config, - )?; + ) + .context(BuildServiceSnafu)?; let rg_configmap = build_server_rolegroup_config_map( zk, &rolegroup, @@ -653,123 +661,6 @@ fn build_server_rolegroup_config_map( }) } -/// The rolegroup [`Service`] is a headless service that allows internal access to the instances of a certain rolegroup -/// -/// This is mostly useful for internal communication between peers, or for clients that perform client-side load balancing. -fn build_server_rolegroup_headless_service( - zk: &v1alpha1::ZookeeperCluster, - rolegroup: &RoleGroupRef, - resolved_product_image: &ResolvedProductImage, -) -> Result { - let metadata = ObjectMetaBuilder::new() - .name_and_namespace(zk) - .name(rolegroup.rolegroup_headless_service_name()) - .ownerreference_from_resource(zk, None, Some(true)) - .context(ObjectMissingMetadataForOwnerRefSnafu)? - .with_recommended_labels(build_recommended_labels( - zk, - ZK_CONTROLLER_NAME, - &resolved_product_image.app_version_label_value, - &rolegroup.role, - &rolegroup.role_group, - )) - .context(ObjectMetaSnafu)? - .build(); - - let service_selector_labels = - Labels::role_group_selector(zk, APP_NAME, &rolegroup.role, &rolegroup.role_group) - .context(BuildLabelSnafu)?; - - let service_spec = ServiceSpec { - // Internal communication does not need to be exposed - type_: Some("ClusterIP".to_string()), - cluster_ip: Some("None".to_string()), - ports: Some(vec![ - ServicePort { - name: Some(ZOOKEEPER_LEADER_PORT_NAME.to_string()), - port: ZOOKEEPER_LEADER_PORT as i32, - protocol: Some("TCP".to_string()), - ..ServicePort::default() - }, - ServicePort { - name: Some(ZOOKEEPER_ELECTION_PORT_NAME.to_string()), - port: ZOOKEEPER_ELECTION_PORT as i32, - protocol: Some("TCP".to_string()), - ..ServicePort::default() - }, - ]), - selector: Some(service_selector_labels.into()), - publish_not_ready_addresses: Some(true), - ..ServiceSpec::default() - }; - - Ok(Service { - metadata, - spec: Some(service_spec), - status: None, - }) -} - -/// The rolegroup [`Service`] for exposing metrics -fn build_server_rolegroup_metrics_service( - zk: &v1alpha1::ZookeeperCluster, - rolegroup: &RoleGroupRef, - resolved_product_image: &ResolvedProductImage, - rolegroup_config: &HashMap>, -) -> Result { - let prometheus_label = - Label::try_from(("prometheus.io/scrape", "true")).context(BuildLabelSnafu)?; - - let metadata = ObjectMetaBuilder::new() - .name_and_namespace(zk) - .name(rolegroup.rolegroup_metrics_service_name()) - .ownerreference_from_resource(zk, None, Some(true)) - .context(ObjectMissingMetadataForOwnerRefSnafu)? - .with_recommended_labels(build_recommended_labels( - zk, - ZK_CONTROLLER_NAME, - &resolved_product_image.app_version_label_value, - &rolegroup.role, - &rolegroup.role_group, - )) - .context(ObjectMetaSnafu)? - .with_label(prometheus_label) - .build(); - - let service_selector_labels = - Labels::role_group_selector(zk, APP_NAME, &rolegroup.role, &rolegroup.role_group) - .context(BuildLabelSnafu)?; - - let service_spec = ServiceSpec { - // Internal communication does not need to be exposed - type_: Some("ClusterIP".to_string()), - cluster_ip: Some("None".to_string()), - ports: Some(vec![ - ServicePort { - name: Some(JMX_METRICS_PORT_NAME.to_string()), - port: JMX_METRICS_PORT as i32, - protocol: Some("TCP".to_string()), - ..ServicePort::default() - }, - ServicePort { - name: Some(METRICS_PROVIDER_HTTP_PORT_NAME.to_string()), - port: metrics_port_from_rolegroup_config(rolegroup_config) as i32, - protocol: Some("TCP".to_string()), - ..ServicePort::default() - }, - ]), - selector: Some(service_selector_labels.into()), - publish_not_ready_addresses: Some(true), - ..ServiceSpec::default() - }; - - Ok(Service { - metadata, - spec: Some(service_spec), - status: None, - }) -} - pub fn build_role_listener_pvc( group_listener_name: &str, unversioned_recommended_labels: &Labels, @@ -1151,27 +1042,6 @@ fn build_server_rolegroup_statefulset( }) } -fn metrics_port_from_rolegroup_config( - rolegroup_config: &HashMap>, -) -> u16 { - let metrics_port = rolegroup_config - .get(&PropertyNameKind::File( - ZOOKEEPER_PROPERTIES_FILE.to_string(), - )) - .expect("{ZOOKEEPER_PROPERTIES_FILE} is present") - .get(METRICS_PROVIDER_HTTP_PORT_KEY) - .expect("{METRICS_PROVIDER_HTTP_PORT_KEY} is set"); - - match u16::from_str(metrics_port) { - Ok(port) => port, - Err(err) => { - tracing::error!("{err}"); - tracing::info!("Defaulting to using {METRICS_PROVIDER_HTTP_PORT} as metrics port."); - METRICS_PROVIDER_HTTP_PORT - } - } -} - pub fn error_policy( _obj: Arc>, error: &Error, From 3467008d3548bd9d4c22087c2173d9c2d65553b2 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Mon, 20 Oct 2025 09:43:22 +0200 Subject: [PATCH 3/6] adapted changelog --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 72a09b31..3de65f2a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -12,6 +12,7 @@ All notable changes to this project will be documented in this file. - `EOS_DISABLED` (`--eos-disabled`) to disable the EoS checker completely. - Helm: Allow Pod `priorityClassName` to be configured ([#974]). - Add support for `3.9.4` ([#977]). +- Add `prometheus.io/path|port|scheme` annotations to metrics service (for native metrics) ([#978]). ### Changed @@ -20,6 +21,7 @@ All notable changes to this project will be documented in this file. [#974]: https://github.com/stackabletech/zookeeper-operator/pull/974 [#976]: https://github.com/stackabletech/zookeeper-operator/pull/976 [#977]: https://github.com/stackabletech/zookeeper-operator/pull/977 +[#978]: https://github.com/stackabletech/zookeeper-operator/pull/978 ## [25.7.0] - 2025-07-23 From 7086c959bbaa061e7dc7c68da53b6a13c11234c9 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Mon, 20 Oct 2025 09:45:41 +0200 Subject: [PATCH 4/6] clippy --- rust/operator-binary/src/service.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/rust/operator-binary/src/service.rs b/rust/operator-binary/src/service.rs index 0f7423b0..e7052b0f 100644 --- a/rust/operator-binary/src/service.rs +++ b/rust/operator-binary/src/service.rs @@ -4,7 +4,6 @@ use std::{ }; use product_config::types::PropertyNameKind; -use serde::de::IntoDeserializer; use snafu::{ResultExt, Snafu}; use stackable_operator::{ builder::meta::ObjectMetaBuilder, From 45ea882124de816310de99a4490298b9a4e3afb2 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Mon, 20 Oct 2025 09:51:07 +0200 Subject: [PATCH 5/6] fix cargo doc --- rust/operator-binary/src/zk_controller.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/rust/operator-binary/src/zk_controller.rs b/rust/operator-binary/src/zk_controller.rs index caeb5b88..5c0432c1 100644 --- a/rust/operator-binary/src/zk_controller.rs +++ b/rust/operator-binary/src/zk_controller.rs @@ -675,7 +675,7 @@ pub fn build_role_listener_pvc( /// The rolegroup [`StatefulSet`] runs the rolegroup, as configured by the administrator. /// -/// The [`Pod`](`stackable_operator::k8s_openapi::api::core::v1::Pod`)s are accessible through the corresponding headless [`Service`] (from [`build_server_rolegroup_headless_service`]). +/// The [`Pod`](`stackable_operator::k8s_openapi::api::core::v1::Pod`)s are accessible through the corresponding headless [`stackable_operator::k8s_openapi::api::core::v1::Service`] (from [`build_server_rolegroup_headless_service`]). #[allow(clippy::too_many_arguments)] fn build_server_rolegroup_statefulset( zk: &v1alpha1::ZookeeperCluster, From 69c1d8a6f649358de121d55f4c98642556384af5 Mon Sep 17 00:00:00 2001 From: Malte Sander Date: Tue, 21 Oct 2025 10:16:54 +0200 Subject: [PATCH 6/6] replace expects with errors --- rust/operator-binary/src/service.rs | 22 +++++++++++++++------- rust/operator-binary/src/zk_controller.rs | 8 +++++++- 2 files changed, 22 insertions(+), 8 deletions(-) diff --git a/rust/operator-binary/src/service.rs b/rust/operator-binary/src/service.rs index e7052b0f..1e7e4ecf 100644 --- a/rust/operator-binary/src/service.rs +++ b/rust/operator-binary/src/service.rs @@ -4,7 +4,7 @@ use std::{ }; use product_config::types::PropertyNameKind; -use snafu::{ResultExt, Snafu}; +use snafu::{OptionExt, ResultExt, Snafu}; use stackable_operator::{ builder::meta::ObjectMetaBuilder, commons::product_image_selection::ResolvedProductImage, @@ -40,6 +40,12 @@ pub enum Error { BuildLabel { source: stackable_operator::kvp::LabelError, }, + + #[snafu(display("missing zookeeper properties file {ZOOKEEPER_PROPERTIES_FILE} in config"))] + MissingPropertiesFile, + + #[snafu(display("missing provider http port key {METRICS_PROVIDER_HTTP_PORT_KEY} in config"))] + MissingProviderHttpPortKey, } /// The rolegroup [`Service`] is a headless service that allows internal access to the instances of a certain rolegroup @@ -106,7 +112,7 @@ pub(crate) fn build_server_rolegroup_metrics_service( resolved_product_image: &ResolvedProductImage, rolegroup_config: &HashMap>, ) -> Result { - let metrics_port = metrics_port_from_rolegroup_config(rolegroup_config); + let metrics_port = metrics_port_from_rolegroup_config(rolegroup_config)?; let metadata = ObjectMetaBuilder::new() .name_and_namespace(zk) @@ -162,23 +168,25 @@ pub(crate) fn build_server_rolegroup_metrics_service( pub(crate) fn metrics_port_from_rolegroup_config( rolegroup_config: &HashMap>, -) -> u16 { +) -> Result { let metrics_port = rolegroup_config .get(&PropertyNameKind::File( ZOOKEEPER_PROPERTIES_FILE.to_string(), )) - .expect("{ZOOKEEPER_PROPERTIES_FILE} is present") + .context(MissingPropertiesFileSnafu)? .get(METRICS_PROVIDER_HTTP_PORT_KEY) - .expect("{METRICS_PROVIDER_HTTP_PORT_KEY} is set"); + .context(MissingProviderHttpPortKeySnafu)?; - match u16::from_str(metrics_port) { + let port = match u16::from_str(metrics_port) { Ok(port) => port, Err(err) => { tracing::error!("{err}"); tracing::info!("Defaulting to using {METRICS_PROVIDER_HTTP_PORT} as metrics port."); METRICS_PROVIDER_HTTP_PORT } - } + }; + + Ok(port) } /// Common labels for Prometheus diff --git a/rust/operator-binary/src/zk_controller.rs b/rust/operator-binary/src/zk_controller.rs index 5c0432c1..26973246 100644 --- a/rust/operator-binary/src/zk_controller.rs +++ b/rust/operator-binary/src/zk_controller.rs @@ -293,6 +293,9 @@ pub enum Error { #[snafu(display("failed to build service"))] BuildService { source: service::Error }, + + #[snafu(display("failed to retrieve metrics port from config"))] + RetrieveMetricsPortFromConfig { source: service::Error }, } impl ReconcilerError for Error { @@ -342,6 +345,7 @@ impl ReconcilerError for Error { Error::ListenerConfiguration { .. } => None, Error::ResolveProductImage { .. } => None, Error::BuildService { .. } => None, + Error::RetrieveMetricsPortFromConfig { .. } => None, } } } @@ -875,7 +879,9 @@ fn build_server_rolegroup_statefulset( .add_container_port(JMX_METRICS_PORT_NAME, 9505) .add_container_port( METRICS_PROVIDER_HTTP_PORT_NAME, - metrics_port_from_rolegroup_config(server_config).into(), + metrics_port_from_rolegroup_config(server_config) + .context(RetrieveMetricsPortFromConfigSnafu)? + .into(), ) .add_volume_mount("data", STACKABLE_DATA_DIR) .context(AddVolumeMountSnafu)?