Skip to content

Adds functionality to specify custom catalog definitions for Trino. #161

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions deploy/crd/trinocluster.crd.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -199,6 +199,12 @@ spec:
required:
- roleGroups
type: object
customCatalogs:
description: An optional list of references to ConfigMaps that contain catalog definitions which should be deployed into the Trino instance
items:
type: string
nullable: true
type: array
hiveConfigMapName:
nullable: true
type: string
Expand Down
6 changes: 6 additions & 0 deletions deploy/helm/trino-operator/crds/crds.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -201,6 +201,12 @@ spec:
required:
- roleGroups
type: object
customCatalogs:
description: An optional list of references to ConfigMaps that contain catalog definitions which should be deployed into the Trino instance
items:
type: string
nullable: true
type: array
hiveConfigMapName:
nullable: true
type: string
Expand Down
6 changes: 6 additions & 0 deletions deploy/manifests/crds.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -203,6 +203,12 @@ spec:
required:
- roleGroups
type: object
customCatalogs:
description: An optional list of references to ConfigMaps that contain catalog definitions which should be deployed into the Trino instance
items:
type: string
nullable: true
type: array
hiveConfigMapName:
nullable: true
type: string
Expand Down
32 changes: 32 additions & 0 deletions docs/modules/ROOT/pages/usage.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,38 @@ To access the CLI please execute:

If you use self signed certificates, you also need the `--insecure` flag above which can be omitted otherwise.

=== Defining Catalogs
If you specify a Hive connection in your Trino definition, the 'hive' catalog will automatically be created for you.

To specify additional catalogs, you can add these via the `customCatalogs` option in the TrinoCluster CRD.
Every entry in this list is treated as the name of a ConfigMap, and all these ConfigMaps will be merged into the catalog definitions which Trino is then configured with.

To define a hive catalog and an additional postgres catalog, you would for example define the following objects:

[source,yaml]
----
apiVersion: trino.stackable.tech/v1alpha1
kind: TrinoCluster
metadata:
name: simple-trino
spec:
hiveConfigMapName: simple-hive-derby
customCatalogs:
- trino-postgres
...
---
apiVersion: v1
kind: ConfigMap
metadata:
name: trino-postgres
data:
postgres.properties: |
<insert catalog properties>
----

WARNING: To avoid hidden failures the operator will fail early on missing configmaps or duplicate keys in configmaps that are referenced!


=== Test Trino with Hive and S3

Create a schema and a table for the Iris data located in S3 and query data. This assumes to have the Iris data set in the `PARQUET` format available in the S3 bucket which can be downloaded https://www.kaggle.com/gpreda/iris-dataset/version/2?select=iris.parquet[here]
Expand Down
3 changes: 3 additions & 0 deletions rust/crd/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,9 @@ pub struct TrinoClusterSpec {
pub hive_config_map_name: Option<String>,
#[serde(default, skip_serializing_if = "Option::is_none")]
pub opa_config_map_name: Option<String>,
/// An optional list of references to ConfigMaps that contain catalog definitions
/// which should be deployed into the Trino instance
pub custom_catalogs: Option<Vec<String>>,
/// A reference to a secret containing username/password for defined users
#[serde(default, skip_serializing_if = "Option::is_none")]
pub authentication: Option<Authentication>,
Expand Down
52 changes: 50 additions & 2 deletions rust/operator-binary/src/controller.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
//! Ensures that `Pod`s are configured and running for each [`TrinoCluster`]
use snafu::{OptionExt, ResultExt, Snafu};
use stackable_operator::kube::Resource;
use stackable_operator::{
builder::{ConfigMapBuilder, ContainerBuilder, ObjectMetaBuilder, PodBuilder},
client::Client,
Expand Down Expand Up @@ -121,6 +122,15 @@ pub enum Error {
InternalOperatorFailure { source: stackable_trino_crd::Error },
#[snafu(display("no coordinator pods found for discovery"))]
MissingCoordinatorPods,
#[snafu(display("Failed retrieving a ConfigMap that was referenced from the cluster definition : [{config_map_name}]"))]
ConfigMapReference {
source: stackable_operator::error::Error,
config_map_name: String,
},
#[snafu(display(
"Duplicate catalog definitions found during reconciliation: [{duplicate_catalogs:?}]"
))]
DuplicateCatalogs { duplicate_catalogs: Vec<String> },
}

type Result<T, E = Error> = std::result::Result<T, E>;
Expand Down Expand Up @@ -168,7 +178,7 @@ pub async fn reconcile_trino(
let rg_configmap =
build_rolegroup_config_map(&trino, &trino_role, &rolegroup, &config)?;
let rg_catalog_configmap =
build_rolegroup_catalog_config_map(&trino, &rolegroup, &config)?;
build_rolegroup_catalog_config_map(&trino, &rolegroup, &config, client).await?;
let rg_stateful_set = build_rolegroup_statefulset(
&trino,
&trino_role,
Expand Down Expand Up @@ -370,12 +380,14 @@ fn build_rolegroup_config_map(

/// The rolegroup catalog [`ConfigMap`] configures the rolegroup catalog based on the configuration
/// given by the administrator
fn build_rolegroup_catalog_config_map(
async fn build_rolegroup_catalog_config_map(
trino: &TrinoCluster,
rolegroup_ref: &RoleGroupRef<TrinoCluster>,
config: &HashMap<PropertyNameKind, BTreeMap<String, String>>,
client: &Client,
) -> Result<ConfigMap> {
let mut cm_hive_data = BTreeMap::new();
let mut catalog_conflicts: Vec<String> = Vec::new();

for (property_name_kind, config) in config {
let mut transformed_config: BTreeMap<String, Option<String>> = config
Expand Down Expand Up @@ -403,6 +415,42 @@ fn build_rolegroup_catalog_config_map(
}
}

let ns = trino
.meta()
.namespace
.as_ref()
.with_context(|| ObjectHasNoNamespaceSnafu {})?;

// Add extra catalogs that have been defined
if let Some(catalog_list) = &trino.spec.custom_catalogs {
for config_map_name in catalog_list {
let config_map = client
.get::<ConfigMap>(config_map_name, Some(ns))
.await
.with_context(|_| ConfigMapReferenceSnafu { config_map_name })?;

if let Some(data) = config_map.data {
for (key, value) in data {
// Check if there is already a key of this name, if so, add it to the list
// of conflicts, otherwise add to config
if cm_hive_data.contains_key(&key) {
catalog_conflicts.push(key);
} else {
cm_hive_data.insert(key.clone(), value.clone());
}
}
}
}
}
if !catalog_conflicts.is_empty() {
// There were duplicate entries, which means catalogs of the same name
// Instead of silently overwriting this, we'll fail loudly here.
return DuplicateCatalogsSnafu {
duplicate_catalogs: catalog_conflicts,
}
.fail();
}

ConfigMapBuilder::new()
.metadata(
ObjectMetaBuilder::new()
Expand Down