Skip to content

Commit

Permalink
NIFI-8519 Adding HDFS support for NAR autoload
Browse files Browse the repository at this point in the history
- Refining classloader management with the help of @markap14

This closes apache#5059

Signed-off-by: Mark Payne <markap14@hotmail.com>
  • Loading branch information
simonbence authored and markap14 committed May 14, 2021
1 parent 7c08fbc commit 51aae5b
Show file tree
Hide file tree
Showing 22 changed files with 1,193 additions and 111 deletions.
44 changes: 44 additions & 0 deletions nifi-api/src/main/java/org/apache/nifi/nar/NarProvider.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.nar;

import java.io.IOException;
import java.io.InputStream;
import java.util.Collection;

/**
* Represents an external source where the NAR files might be acquired from. Used by the NAR auto loader functionality
* in order to poll an external source for new NAR files to load.
*/
public interface NarProvider {
/**
* Initializes the NAR Provider based on the given set of properties.
*/
void initialize(NarProviderInitializationContext context);

/**
* Performs a listing of all NAR's that are available.
*
* @Return The result is a list of locations, where the format depends on the actual implementation.
*/
Collection<String> listNars() throws IOException;

/**
* Fetches the NAR at the given location. The location should be one of the values returned by <code>listNars()</code>.
*/
InputStream fetchNarContents(String location) throws IOException;
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,30 @@
/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.nifi.nar;

import java.util.Map;

/**
* Contains necessary information for extensions of NAR auto loader functionality.
*/
public interface NarProviderInitializationContext {

/**
* @return Returns with the available properties.
*/
Map<String, String> getProperties();
}
Original file line number Diff line number Diff line change
Expand Up @@ -1914,6 +1914,39 @@ public Path getQuestDbStatusRepositoryPath() {
return Paths.get(getProperty(STATUS_REPOSITORY_QUESTDB_PERSIST_LOCATION, DEFAULT_COMPONENT_STATUS_REPOSITORY_PERSIST_LOCATION));
}

/**
* Returns all properties where the property key starts with the prefix.
*
* @param prefix The exact string the returned properties should start with. Dots are considered, thus prefix "item" will return both
* properties starting with "item." and "items". Properties with empty value will be included as well.
*
* @return A map of properties starting with the prefix.
*/
public Map<String, String> getPropertiesWithPrefix(final String prefix) {
return getPropertyKeys().stream().filter(key -> key.startsWith(prefix)).collect(Collectors.toMap(key -> key, key -> getProperty(key)));
}

/**
* Returns with all the possible next "tokens" after the given prefix. An alphanumeric string between dots is considered as a "token".
*
* For example if there are "parent.sub1" and a "parent.sub2" properties are set, and the prefix is "parent", the method will return
* with a set, consisting of "sub1" and "sub2. Only directly subsequent tokens are considered, so in case of "parent.sub1.subsub1", the
* result will contain "sub1" as well.
*
* @param prefix The prefix of the request.
*
* @return A set of direct subsequent tokens.
*/
public Set<String> getDirectSubsequentTokens(final String prefix) {
final String fixedPrefix = prefix.endsWith(".") ? prefix : prefix + ".";

return getPropertyKeys().stream()
.filter(key -> key.startsWith(fixedPrefix))
.map(key -> key.substring(fixedPrefix.length()))
.map(key -> key.indexOf('.') == -1 ? key : key.substring(0, key.indexOf('.')))
.collect(Collectors.toSet());
}

/**
* Creates an instance of NiFiProperties. This should likely not be called
* by any classes outside of the NiFi framework but can be useful by the
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -374,4 +374,100 @@ public void testTlsConfigurationIsNotPresentWithNoProperties() {

assertFalse(properties.isTlsConfigurationPresent());
}

@Test
public void testGetPropertiesWithPrefixWithoutDot() {
// given
final NiFiProperties testSubject = loadNiFiProperties("/NiFiProperties/conf/nifi.properties", null);

// when
final Map<String, String> result = testSubject.getPropertiesWithPrefix("nifi.web.http");

// then
Assert.assertEquals(4, result.size());
Assert.assertTrue(result.containsKey("nifi.web.http.host"));
Assert.assertTrue(result.containsKey("nifi.web.https.host"));
}

@Test
public void testGetPropertiesWithPrefixWithDot() {
// given
final NiFiProperties testSubject = loadNiFiProperties("/NiFiProperties/conf/nifi.properties", null);

// when
final Map<String, String> result = testSubject.getPropertiesWithPrefix("nifi.web.http.");

// then
Assert.assertEquals(2, result.size());
Assert.assertTrue(result.containsKey("nifi.web.http.host"));
Assert.assertFalse(result.containsKey("nifi.web.https.host"));
}

@Test
public void testGetPropertiesWithPrefixWhenNoResult() {
// given
final NiFiProperties testSubject = loadNiFiProperties("/NiFiProperties/conf/nifi.properties", null);

// when
final Map<String, String> result = testSubject.getPropertiesWithPrefix("invalid.property");

// then
Assert.assertTrue(result.isEmpty());
}

@Test
public void testGetDirectSubsequentTokensWithoutDot() {
// given
final NiFiProperties testSubject = loadNiFiProperties("/NiFiProperties/conf/nifi.properties", null);

// when
final Set<String> result = testSubject.getDirectSubsequentTokens("nifi.web.http");

// then
Assert.assertEquals(2, result.size());
Assert.assertTrue(result.contains("host"));
Assert.assertTrue(result.contains("port"));
}

@Test
public void testGetDirectSubsequentTokensWithDot() {
// given
final NiFiProperties testSubject = loadNiFiProperties("/NiFiProperties/conf/nifi.properties", null);

// when
final Set<String> result = testSubject.getDirectSubsequentTokens("nifi.web.http.");

// then
Assert.assertEquals(2, result.size());
Assert.assertTrue(result.contains("host"));
Assert.assertTrue(result.contains("port"));
}

@Test
public void testGetDirectSubsequentTokensWithNonExistingToken() {
// given
final NiFiProperties testSubject = loadNiFiProperties("/NiFiProperties/conf/nifi.properties", null);

// when
final Set<String> result = testSubject.getDirectSubsequentTokens("lorem.ipsum");

// then
Assert.assertTrue(result.isEmpty());
}

@Test
public void testGetDirectSubsequentTokensWhenMoreTokensAfterward() {
// given
final NiFiProperties testSubject = loadNiFiProperties("/NiFiProperties/conf/nifi.properties", null);

// when
final Set<String> result = testSubject.getDirectSubsequentTokens("nifi.web");

// then
Assert.assertEquals(4, result.size());
Assert.assertTrue(result.contains("http"));
Assert.assertTrue(result.contains("https"));
Assert.assertTrue(result.contains("war"));
Assert.assertTrue(result.contains("jetty"));
}
}
39 changes: 39 additions & 0 deletions nifi-docs/src/main/asciidoc/administration-guide.adoc
Original file line number Diff line number Diff line change
Expand Up @@ -4012,3 +4012,42 @@ Now, we must place our custom processor nar in the configured directory. The con
Ensure that the file has appropriate permissions for the nifi user and group.

Refresh the browser page and the custom processor should now be available when adding a new Processor to your flow.

=== NAR Providers

NiFi supports fetching NAR files for the autoloading feature from external sources. This can be achieved by using NAR Providers. A NAR Provider serves as a connector between an external data store
and NiFi.

When configured, a NAR Provider polls the external source for available NAR files and offers them to the framework. The framework then fetches new NAR files and copies them to
the `nifi.nar.library.autoload.directory` for autoloading.

NAR Provider can be configured by adding the `nifi.nar.library.provider.<providerName>.implementation` property with value containing the proper implementation class. Some implementations might need
further properties. These are defined by the implementation and must be prefixed with `nifi.nar.library.provider.<providerName>.`.

The `<providerName>` is arbitrary and serves to correlate multiple properties together for a single provider. Multiple providers might be set, with different `<providerName>`. Currently NiFi supports HDFS based NAR provider.

==== HDFS NAR Provider

This implementation is capable of downloading NAR files from an HDFS file system.

The value of the `nifi.nar.library.provider.<providerName>.implementation` must be `org.apache.nifi.nar.hadoop.HDFSNarProvider`. The following further properties are defined by the provider:

[options="header"]
|===
| Name | Description
| resources | List of HDFS resources, separated by comma.
| source.directory | The source directory of NAR files within HDFS. Note: the provider does not check for files recursively.
| kerberos.principal | Optional. Kerberos principal to authenticate as.
| kerberos.keytab | Optional. Kerberos keytab associated with the principal.
| kerberos.password | Optional. Kerberos password associated with the principal.
|===

Example configuration:

nifi.nar.library.provider.hdfs1.implementation=org.apache.nifi.nar.hadoop.HDFSNarProvider
nifi.nar.library.provider.hdfs1.resources=/etc/hadoop/core-site.xml
nifi.nar.library.provider.hdfs1.source.directory=/customNars

nifi.nar.library.provider.hdfs2.implementation=org.apache.nifi.nar.hadoop.HDFSNarProvider
nifi.nar.library.provider.hdfs2.resources=/etc/hadoop/core-site.xml
nifi.nar.library.provider.hdfs2.source.directory=/other/dir/for/customNars
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,6 @@
import org.apache.nifi.hadoop.KerberosProperties;
import org.apache.nifi.hadoop.SecurityUtil;
import org.apache.nifi.kerberos.KerberosCredentialsService;
import org.apache.nifi.logging.ComponentLog;
import org.apache.nifi.processor.AbstractProcessor;
import org.apache.nifi.processor.ProcessContext;
import org.apache.nifi.processor.ProcessorInitializationContext;
Expand All @@ -51,7 +50,6 @@
import javax.security.auth.login.LoginException;
import java.io.File;
import java.io.IOException;
import java.lang.ref.WeakReference;
import java.lang.reflect.Field;
import java.net.InetSocketAddress;
import java.net.Socket;
Expand All @@ -62,8 +60,6 @@
import java.util.Collection;
import java.util.Collections;
import java.util.List;
import java.util.Map;
import java.util.WeakHashMap;
import java.util.concurrent.atomic.AtomicReference;
import java.util.regex.Pattern;

Expand Down Expand Up @@ -592,36 +588,6 @@ private boolean isFileSystemAccessDenied(final URI fileSystemUri) {
return accessDenied;
}

static protected class HdfsResources {
private final Configuration configuration;
private final FileSystem fileSystem;
private final UserGroupInformation userGroupInformation;
private final KerberosUser kerberosUser;

public HdfsResources(Configuration configuration, FileSystem fileSystem, UserGroupInformation userGroupInformation, KerberosUser kerberosUser) {
this.configuration = configuration;
this.fileSystem = fileSystem;
this.userGroupInformation = userGroupInformation;
this.kerberosUser = kerberosUser;
}

public Configuration getConfiguration() {
return configuration;
}

public FileSystem getFileSystem() {
return fileSystem;
}

public UserGroupInformation getUserGroupInformation() {
return userGroupInformation;
}

public KerberosUser getKerberosUser() {
return kerberosUser;
}
}

static protected class ValidationResources {
private final ResourceReferences configResources;
private final Configuration configuration;
Expand All @@ -640,57 +606,4 @@ public Configuration getConfiguration() {
}
}

/**
* Extending Hadoop Configuration to prevent it from caching classes that can't be found. Since users may be
* adding additional JARs to the classpath we don't want them to have to restart the JVM to be able to load
* something that was previously not found, but might now be available.
*
* Reference the original getClassByNameOrNull from Configuration.
*/
static class ExtendedConfiguration extends Configuration {

private final ComponentLog logger;
private final Map<ClassLoader, Map<String, WeakReference<Class<?>>>> CACHE_CLASSES = new WeakHashMap<>();

public ExtendedConfiguration(final ComponentLog logger) {
this.logger = logger;
}

@Override
public Class<?> getClassByNameOrNull(String name) {
final ClassLoader classLoader = getClassLoader();

Map<String, WeakReference<Class<?>>> map;
synchronized (CACHE_CLASSES) {
map = CACHE_CLASSES.get(classLoader);
if (map == null) {
map = Collections.synchronizedMap(new WeakHashMap<>());
CACHE_CLASSES.put(classLoader, map);
}
}

Class<?> clazz = null;
WeakReference<Class<?>> ref = map.get(name);
if (ref != null) {
clazz = ref.get();
}

if (clazz == null) {
try {
clazz = Class.forName(name, true, classLoader);
} catch (ClassNotFoundException e) {
logger.error(e.getMessage(), e);
return null;
}
// two putters can race here, but they'll put the same class
map.put(name, new WeakReference<>(clazz));
return clazz;
} else {
// cache hit
return clazz;
}
}

}

}
Loading

0 comments on commit 51aae5b

Please sign in to comment.