Skip to content

Commit 83ec87b

Browse files
committed
Refactor CatalogContext to use interface segregation pattern
- Introduce ICatalogContext interface for Hadoop-free environments - Add HadoopAware interface to isolate Hadoop dependency - CatalogContext implements ICatalogContext - CatalogHadoopContext implements HadoopAware - Update FileIO and related classes to use ICatalogContext - Remove problematic hadoopContext() method from CatalogContext - Simplify CatalogContext factory methods to reduce code duplication
1 parent b7a42cd commit 83ec87b

File tree

41 files changed

+669
-129
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

41 files changed

+669
-129
lines changed

paimon-common/src/main/java/org/apache/paimon/catalog/CatalogContext.java

Lines changed: 90 additions & 35 deletions
Original file line numberDiff line numberDiff line change
@@ -21,87 +21,142 @@
2121
import org.apache.paimon.annotation.Public;
2222
import org.apache.paimon.fs.FileIOLoader;
2323
import org.apache.paimon.fs.Path;
24-
import org.apache.paimon.hadoop.SerializableConfiguration;
2524
import org.apache.paimon.options.Options;
2625

27-
import org.apache.hadoop.conf.Configuration;
28-
2926
import javax.annotation.Nullable;
3027

31-
import java.io.Serializable;
32-
3328
import static org.apache.paimon.options.CatalogOptions.WAREHOUSE;
34-
import static org.apache.paimon.utils.HadoopUtils.getHadoopConfiguration;
29+
import static org.apache.paimon.utils.HadoopUtils.HADOOP_LOAD_DEFAULT_CONFIG;
3530
import static org.apache.paimon.utils.Preconditions.checkNotNull;
3631

3732
/**
38-
* Context of catalog.
33+
* Context of catalog for Hadoop-free environments.
34+
*
35+
* <p>This class provides basic catalog context without Hadoop dependencies. The factory methods
36+
* ({@link #create(Options)}, etc.) automatically detect whether Hadoop Configuration is needed and
37+
* return the appropriate type ({@link CatalogContext} or {@link CatalogHadoopContext}).
38+
*
39+
* <h3>When CatalogContext is Used</h3>
40+
*
41+
* <p>The factory will create a basic {@code CatalogContext} when:
42+
*
43+
* <ul>
44+
* <li>Working with local filesystem or cloud storage (S3, Azure, GCS) without Hadoop
45+
* <li>No Hadoop-based features are required
46+
* <li>Running in a non-Hadoop environment
47+
* </ul>
48+
*
49+
* <h3>When CatalogHadoopContext is Used</h3>
50+
*
51+
* <p>The factory automatically creates a {@link CatalogHadoopContext} when it detects:
52+
*
53+
* <ul>
54+
* <li><b>Hive metastore</b>: {@code metastore=hive} option is set
55+
* <li><b>HDFS filesystem</b>: Warehouse path starts with {@code hdfs://}, {@code viewfs://}, or
56+
* {@code har://}
57+
* <li><b>Kerberos security</b>: Any Kerberos-related options are configured
58+
* <li><b>Hadoop environment</b>: {@code HADOOP_CONF_DIR} or {@code HADOOP_HOME} environment
59+
* variables are set
60+
* <li><b>Explicit option</b>: {@code hadoop-load-default-config=true} is set
61+
* </ul>
62+
*
63+
* <h3>Direct Type Selection</h3>
64+
*
65+
* <p>For explicit control, call {@link CatalogHadoopContext#create(Options)} directly instead of
66+
* using this factory.
3967
*
4068
* @since 0.4.0
69+
* @see CatalogHadoopContext
70+
* @see HadoopAware
4171
*/
4272
@Public
43-
public class CatalogContext implements Serializable {
73+
public class CatalogContext implements ICatalogContext {
4474

4575
private static final long serialVersionUID = 1L;
4676

47-
private final Options options;
48-
private final SerializableConfiguration hadoopConf;
49-
@Nullable private final FileIOLoader preferIOLoader;
50-
@Nullable private final FileIOLoader fallbackIOLoader;
77+
protected final Options options;
78+
@Nullable protected final FileIOLoader preferIOLoader;
79+
@Nullable protected final FileIOLoader fallbackIOLoader;
5180

52-
private CatalogContext(
81+
protected CatalogContext(
5382
Options options,
54-
@Nullable Configuration hadoopConf,
5583
@Nullable FileIOLoader preferIOLoader,
5684
@Nullable FileIOLoader fallbackIOLoader) {
5785
this.options = checkNotNull(options);
58-
this.hadoopConf =
59-
new SerializableConfiguration(
60-
hadoopConf == null ? getHadoopConfiguration(options) : hadoopConf);
6186
this.preferIOLoader = preferIOLoader;
6287
this.fallbackIOLoader = fallbackIOLoader;
6388
}
6489

90+
public CatalogContext copy(Options options) {
91+
return create(options, this.preferIOLoader, this.fallbackIOLoader);
92+
}
93+
6594
public static CatalogContext create(Path warehouse) {
6695
Options options = new Options();
6796
options.set(WAREHOUSE, warehouse.toUri().toString());
6897
return create(options);
6998
}
7099

71100
public static CatalogContext create(Options options) {
72-
return new CatalogContext(options, null, null, null);
73-
}
74-
75-
public static CatalogContext create(Options options, Configuration hadoopConf) {
76-
return new CatalogContext(options, hadoopConf, null, null);
101+
return create(options, null, null);
77102
}
78103

79104
public static CatalogContext create(Options options, FileIOLoader fallbackIOLoader) {
80-
return new CatalogContext(options, null, null, fallbackIOLoader);
105+
return create(options, null, fallbackIOLoader);
81106
}
82107

83108
public static CatalogContext create(
84109
Options options, FileIOLoader preferIOLoader, FileIOLoader fallbackIOLoader) {
85-
return new CatalogContext(options, null, preferIOLoader, fallbackIOLoader);
110+
return shouldUseHadoopContext(options)
111+
? CatalogHadoopContext.create(options, preferIOLoader, fallbackIOLoader)
112+
: new CatalogContext(options, preferIOLoader, fallbackIOLoader);
86113
}
87114

88-
public static CatalogContext create(
89-
Options options,
90-
Configuration hadoopConf,
91-
FileIOLoader preferIOLoader,
92-
FileIOLoader fallbackIOLoader) {
93-
return new CatalogContext(options, hadoopConf, preferIOLoader, fallbackIOLoader);
115+
/**
116+
* Determines whether to use {@link CatalogHadoopContext} based on multiple detection criteria.
117+
*
118+
* <p>This method intelligently detects whether Hadoop Configuration is needed by checking (in
119+
* order of priority):
120+
*
121+
* <ol>
122+
* <li><b>Metastore type</b>: Hive metastore requires Hadoop Configuration
123+
* <li><b>Filesystem type</b>: HDFS and Hadoop-compatible filesystems require Hadoop
124+
* Configuration
125+
* <li><b>Security configuration</b>: Kerberos authentication requires Hadoop Configuration
126+
* <li><b>Explicit option</b>: The {@code hadoop-load-default-config} option
127+
* </ol>
128+
*
129+
* <p>Note: Environment variable detection (HADOOP_CONF_DIR/HADOOP_HOME) is not used as it may
130+
* give false positives in development/testing environments where Hadoop is present but not
131+
* required for the current catalog.
132+
*
133+
* @param options catalog options
134+
* @return true if {@link CatalogHadoopContext} should be used, false otherwise
135+
*/
136+
private static boolean shouldUseHadoopContext(Options options) {
137+
// Check metastore type (Hive requires Hadoop)
138+
if (CatalogHadoopContext.needsHadoopForMetastore(options)) {
139+
return true;
140+
}
141+
142+
// Check filesystem type (HDFS and similar require Hadoop)
143+
if (CatalogHadoopContext.needsHadoopForFilesystem(options)) {
144+
return true;
145+
}
146+
147+
// Check security configuration (Kerberos requires Hadoop)
148+
if (CatalogHadoopContext.needsHadoopForSecurity(options)) {
149+
return true;
150+
}
151+
152+
// Fall back to explicit option
153+
return options.getBoolean(HADOOP_LOAD_DEFAULT_CONFIG.key(), false);
94154
}
95155

96156
public Options options() {
97157
return options;
98158
}
99159

100-
/** Return hadoop {@link Configuration}. */
101-
public Configuration hadoopConf() {
102-
return hadoopConf.get();
103-
}
104-
105160
@Nullable
106161
public FileIOLoader preferIO() {
107162
return preferIOLoader;
Lines changed: 169 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,169 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.paimon.catalog;
20+
21+
import org.apache.paimon.annotation.Public;
22+
import org.apache.paimon.fs.FileIOLoader;
23+
import org.apache.paimon.fs.Path;
24+
import org.apache.paimon.hadoop.SerializableConfiguration;
25+
import org.apache.paimon.options.Options;
26+
27+
import org.apache.hadoop.conf.Configuration;
28+
29+
import javax.annotation.Nullable;
30+
31+
import static org.apache.paimon.options.CatalogOptions.METASTORE;
32+
import static org.apache.paimon.options.CatalogOptions.WAREHOUSE;
33+
import static org.apache.paimon.utils.HadoopUtils.getHadoopConfiguration;
34+
35+
/**
36+
* Context of catalog with Hadoop configuration support.
37+
*
38+
* <p>This class extends {@link CatalogContext} and implements {@link HadoopAware} to provide access
39+
* to Hadoop configuration. Use this class when Hadoop integration is required.
40+
*
41+
* @since 0.4.0
42+
*/
43+
@Public
44+
public class CatalogHadoopContext extends CatalogContext implements HadoopAware {
45+
46+
private static final long serialVersionUID = 1L;
47+
48+
private final SerializableConfiguration hadoopConf;
49+
50+
private CatalogHadoopContext(
51+
Options options,
52+
@Nullable Configuration hadoopConf,
53+
@Nullable FileIOLoader preferIOLoader,
54+
@Nullable FileIOLoader fallbackIOLoader) {
55+
super(options, preferIOLoader, fallbackIOLoader);
56+
this.hadoopConf =
57+
new SerializableConfiguration(
58+
hadoopConf == null ? getHadoopConfiguration(options) : hadoopConf);
59+
}
60+
61+
@Override
62+
public CatalogContext copy(Options options) {
63+
return CatalogHadoopContext.create(
64+
options, this.hadoopConf.get(), this.preferIOLoader, this.fallbackIOLoader);
65+
}
66+
67+
public static CatalogHadoopContext create(Path warehouse) {
68+
Options options = new Options();
69+
options.set(WAREHOUSE, warehouse.toUri().toString());
70+
return create(options);
71+
}
72+
73+
public static CatalogHadoopContext create(Options options) {
74+
return new CatalogHadoopContext(options, null, null, null);
75+
}
76+
77+
public static CatalogHadoopContext create(Options options, Configuration hadoopConf) {
78+
return new CatalogHadoopContext(options, hadoopConf, null, null);
79+
}
80+
81+
public static CatalogHadoopContext create(Options options, FileIOLoader fallbackIOLoader) {
82+
return new CatalogHadoopContext(options, null, null, fallbackIOLoader);
83+
}
84+
85+
public static CatalogHadoopContext create(
86+
Options options, FileIOLoader preferIOLoader, FileIOLoader fallbackIOLoader) {
87+
return new CatalogHadoopContext(options, null, preferIOLoader, fallbackIOLoader);
88+
}
89+
90+
public static CatalogHadoopContext create(
91+
Options options,
92+
Configuration hadoopConf,
93+
FileIOLoader preferIOLoader,
94+
FileIOLoader fallbackIOLoader) {
95+
return new CatalogHadoopContext(options, hadoopConf, preferIOLoader, fallbackIOLoader);
96+
}
97+
98+
/** Return hadoop {@link Configuration}. */
99+
public Configuration hadoopConf() {
100+
return hadoopConf.get();
101+
}
102+
103+
// ----------------------- Detection Utility Methods -----------------------
104+
105+
/**
106+
* Checks if Hadoop context is needed based on metastore configuration.
107+
*
108+
* <p>Certain metastores (like Hive) require Hadoop Configuration for HMS client creation and
109+
* table operations.
110+
*
111+
* @param options catalog options
112+
* @return true if Hadoop context is required for the configured metastore
113+
*/
114+
static boolean needsHadoopForMetastore(Options options) {
115+
if (!options.contains(METASTORE)) {
116+
return false;
117+
}
118+
String metastore = options.get(METASTORE);
119+
// Hive metastore requires Hadoop configuration
120+
return "hive".equalsIgnoreCase(metastore);
121+
}
122+
123+
/**
124+
* Checks if Hadoop context is needed based on warehouse filesystem.
125+
*
126+
* <p>HDFS and certain Hadoop-compatible filesystems require Hadoop Configuration for file
127+
* operations.
128+
*
129+
* @param options catalog options
130+
* @return true if Hadoop context is required for the warehouse filesystem
131+
*/
132+
static boolean needsHadoopForFilesystem(Options options) {
133+
if (!options.contains(WAREHOUSE)) {
134+
return false;
135+
}
136+
String warehouse = options.get(WAREHOUSE);
137+
// HDFS and some Hadoop filesystems need Hadoop configuration
138+
return warehouse.startsWith("hdfs://")
139+
|| warehouse.startsWith("viewfs://")
140+
|| warehouse.startsWith("har://");
141+
}
142+
143+
/**
144+
* Checks if Hadoop context is needed based on security configuration.
145+
*
146+
* <p>Kerberos authentication and other Hadoop security features require Hadoop Configuration.
147+
*
148+
* @param options catalog options
149+
* @return true if Hadoop context is required for security features
150+
*/
151+
static boolean needsHadoopForSecurity(Options options) {
152+
// Check for Kerberos configuration
153+
return options.containsKey("security.kerberos.login.principal")
154+
|| options.containsKey("security.kerberos.login.keytab")
155+
|| options.containsKey("security.kerberos.login.use-ticket-cache");
156+
}
157+
158+
/**
159+
* Checks if running in a Hadoop environment by detecting environment variables.
160+
*
161+
* <p>Presence of HADOOP_CONF_DIR or HADOOP_HOME suggests a Hadoop environment where Hadoop
162+
* Configuration should be available.
163+
*
164+
* @return true if Hadoop environment is detected
165+
*/
166+
static boolean inHadoopEnvironment() {
167+
return System.getenv("HADOOP_CONF_DIR") != null || System.getenv("HADOOP_HOME") != null;
168+
}
169+
}
Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
/*
2+
* Licensed to the Apache Software Foundation (ASF) under one
3+
* or more contributor license agreements. See the NOTICE file
4+
* distributed with this work for additional information
5+
* regarding copyright ownership. The ASF licenses this file
6+
* to you under the Apache License, Version 2.0 (the
7+
* "License"); you may not use this file except in compliance
8+
* with the License. You may obtain a copy of the License at
9+
*
10+
* http://www.apache.org/licenses/LICENSE-2.0
11+
*
12+
* Unless required by applicable law or agreed to in writing, software
13+
* distributed under the License is distributed on an "AS IS" BASIS,
14+
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15+
* See the License for the specific language governing permissions and
16+
* limitations under the License.
17+
*/
18+
19+
package org.apache.paimon.catalog;
20+
21+
import org.apache.paimon.annotation.Public;
22+
23+
import org.apache.hadoop.conf.Configuration;
24+
25+
/**
26+
* Interface for components that require Hadoop configuration.
27+
*
28+
* <p>This interface provides access to Hadoop {@link Configuration} for components that need to
29+
* interact with Hadoop filesystem or other Hadoop-based services.
30+
*
31+
* <p>Implementing this interface indicates that the component has a dependency on Hadoop classes.
32+
* Components that do not implement this interface can operate in Hadoop-free environments.
33+
*
34+
* @since 0.10.0
35+
*/
36+
@Public
37+
public interface HadoopAware {
38+
39+
/**
40+
* Returns the Hadoop configuration.
41+
*
42+
* @return Hadoop configuration instance
43+
*/
44+
Configuration hadoopConf();
45+
}

0 commit comments

Comments
 (0)