[paypal#308] Introduce AirConcurrentMap as possible backing HashMap

pjeli · Apr 6, 2022 · bba7c56 · bba7c56
1 parent 2ac9f13
commit bba7c56
Show file tree

Hide file tree

Showing 6 changed files with 74 additions and 15 deletions.
diff --git a/build.gradle b/build.gradle
@@ -151,6 +151,7 @@ configurations {
 }
 
 dependencies {
+    outboundDep files('lib/airconcurrentmap.jar')
     outboundDep group: 'org.pac4j', name: 'spark-pac4j', version: '1.2.3'
     outboundDep group: 'org.pac4j', name: 'pac4j-ldap', version: '1.9.4'
     outboundDep group: 'org.pac4j', name: 'pac4j-jwt', version: '1.9.4'

diff --git a/docs/Getting_Started/How_To_Configure.md b/docs/Getting_Started/How_To_Configure.md
@@ -51,7 +51,7 @@ Definitions of configuration NNA-specific properties:
 * `nna.localonly.users=<comma-seperated list of username:password pairs>` - Local-only accounts; recommended for any applications that intend to use NNA API.
 * `nna.query.engine.impl=<string>` - The full canonical class name of the QueryEngine implementation to use. Current existing implementations are `org.apache.hadoop.hdfs.server.namenode.JavaStreamQueryEngine` (recommended and the default). If you wish to create your own you may change this property to load it.
 * `nna.inode.collection.impl=<string>` - The full canonical class name of the INodeFilterer implementation to use. Current existing implementations are `org.apache.hadoop.hdfs.server.namenode.ConcurrentHashMapINodeCollection` (recommended and the default). If you wish to create your own you may change this property to load it. 
-Other options are: `EclipseINodeCollection` and `NonBlockingHashMapINodeCollection`. You may see less overall memory usage by using one of these other implementations.
+Other options are: `EclipseINodeCollection`, `NonBlockingHashMapINodeCollection`, and `AirConcurrentMapINodeCollection`. You may see less overall memory usage by using one of these other implementations. The AirConcurrentMap is experimental at the moment but may provide a performance increase.
 
 ** If you have a `/usr/local/nn-analytics/config/security.properties` file please rename it to `/usr/local/nn-analytics/config/application.properties`. The `security.properties` file is now deprecated. Eventually the new `application.properties` file will also be moved to an XML file in the style of other Hadoop ecosystem configurations.
 

diff --git a/lib/airconcurrentmap.jar b/lib/airconcurrentmap.jar
diff --git a/src/main/java/org/apache/hadoop/hdfs/server/namenode/AirConcurrentMapINodeCollection.java b/src/main/java/org/apache/hadoop/hdfs/server/namenode/AirConcurrentMapINodeCollection.java
@@ -0,0 +1,55 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.hdfs.server.namenode;
+
+import com.infinitydb.map.air.AirConcurrentMap;
+import java.util.Comparator;
+import java.util.Map;
+import java.util.stream.StreamSupport;
+import org.apache.hadoop.util.GSet;
+
+/**
+ * Filters the INode GSet into java.util.ConcurrentHashMap sets. This has great performance in
+ * memory space vs lookup speed.
+ */
+public class AirConcurrentMapINodeCollection implements INodeFilterer {
+
+  @Override
+  public Map<INode, INodeWithAdditionalFields> filterFiles(
+      GSet<INode, INodeWithAdditionalFields> gset) {
+    AirConcurrentMap<INode, INodeWithAdditionalFields> acmMap =
+        new AirConcurrentMap<>(Comparator.comparing(INode::getId));
+    StreamSupport.stream(gset.spliterator(), true)
+        .filter(INode::isFile)
+        .forEach(node -> acmMap.put(node, node));
+    return acmMap;
+  }
+
+  @Override
+  public Map<INode, INodeWithAdditionalFields> filterDirs(
+      GSet<INode, INodeWithAdditionalFields> gset) {
+    AirConcurrentMap<INode, INodeWithAdditionalFields> acmMap =
+        new AirConcurrentMap<>(Comparator.comparing(INode::getId));
+    StreamSupport.stream(gset.spliterator(), true)
+        .filter(INode::isDirectory)
+        .forEach(node -> acmMap.put(node, node));
+    return acmMap;
+  }
+}
diff --git a/src/test/java/org/apache/hadoop/hdfs/server/namenode/analytics/BenchmarkGSetFiltering.java b/src/test/java/org/apache/hadoop/hdfs/server/namenode/analytics/BenchmarkGSetFiltering.java
@@ -21,12 +21,7 @@
 
 import java.util.Map;
 import java.util.concurrent.TimeUnit;
-import org.apache.hadoop.hdfs.server.namenode.ConcurrentHashMapINodeCollection;
-import org.apache.hadoop.hdfs.server.namenode.EclipseINodeCollection;
-import org.apache.hadoop.hdfs.server.namenode.GSetGenerator;
-import org.apache.hadoop.hdfs.server.namenode.INode;
-import org.apache.hadoop.hdfs.server.namenode.INodeWithAdditionalFields;
-import org.apache.hadoop.hdfs.server.namenode.NonBlockingHashMapINodeCollection;
+import org.apache.hadoop.hdfs.server.namenode.*;
 import org.apache.hadoop.util.GSet;
 import org.openjdk.jmh.annotations.Benchmark;
 import org.openjdk.jmh.annotations.BenchmarkMode;
@@ -47,7 +42,7 @@
 
 @State(Scope.Benchmark)
 @BenchmarkMode(Mode.AverageTime)
-@OutputTimeUnit(TimeUnit.SECONDS)
+@OutputTimeUnit(TimeUnit.MILLISECONDS)
 @Fork(1)
 @Warmup(iterations = 5, time = 1)
 @Measurement(iterations = 5, time = 1)
@@ -71,33 +66,34 @@ public static void main(String[] args) throws RunnerException {
   @Setup(Level.Trial)
   public void doSetup() {
     GSetGenerator gSetGenerator = new GSetGenerator();
-    gset = gSetGenerator.getGSet((short) 3, 15, 500);
+    gset = gSetGenerator.getGSet((short) 3, 20, 500);
   }
 
   @Benchmark
   public void benchmarkConcurrentJavaFiltering() {
     ConcurrentHashMapINodeCollection concurrent = new ConcurrentHashMapINodeCollection();
     Map<INode, INodeWithAdditionalFields> files = concurrent.filterFiles(gset);
     Map<INode, INodeWithAdditionalFields> dirs = concurrent.filterDirs(gset);
-    MemoryProfiler.keepReference(files);
-    MemoryProfiler.keepReference(dirs);
   }
 
   @Benchmark
   public void benchmarkNonBlockingHashMapFiltering() {
     NonBlockingHashMapINodeCollection concurrent = new NonBlockingHashMapINodeCollection();
     Map<INode, INodeWithAdditionalFields> files = concurrent.filterFiles(gset);
     Map<INode, INodeWithAdditionalFields> dirs = concurrent.filterDirs(gset);
-    MemoryProfiler.keepReference(files);
-    MemoryProfiler.keepReference(dirs);
   }
 
   @Benchmark
   public void benchmarkEclipseConcurrentFiltering() {
     EclipseINodeCollection concurrent = new EclipseINodeCollection();
     Map<INode, INodeWithAdditionalFields> files = concurrent.filterFiles(gset);
     Map<INode, INodeWithAdditionalFields> dirs = concurrent.filterDirs(gset);
-    MemoryProfiler.keepReference(files);
-    MemoryProfiler.keepReference(dirs);
+  }
+
+  @Benchmark
+  public void benchmarkAirConcurrentMapFiltering() {
+    AirConcurrentMapINodeCollection concurrent = new AirConcurrentMapINodeCollection();
+    Map<INode, INodeWithAdditionalFields> files = concurrent.filterFiles(gset);
+    Map<INode, INodeWithAdditionalFields> dirs = concurrent.filterDirs(gset);
   }
 }
diff --git a/src/test/java/org/apache/hadoop/hdfs/server/namenode/analytics/TestFiltererLoading.java b/src/test/java/org/apache/hadoop/hdfs/server/namenode/analytics/TestFiltererLoading.java
@@ -20,6 +20,7 @@
 package org.apache.hadoop.hdfs.server.namenode.analytics;
 
 import org.apache.hadoop.hdfs.MiniDFSCluster;
+import org.apache.hadoop.hdfs.server.namenode.AirConcurrentMapINodeCollection;
 import org.apache.hadoop.hdfs.server.namenode.ConcurrentHashMapINodeCollection;
 import org.apache.hadoop.hdfs.server.namenode.EclipseINodeCollection;
 import org.apache.hadoop.hdfs.server.namenode.GSetGenerator;
@@ -85,4 +86,10 @@ public void testLoadEclipseFilterer() throws Exception {
     conf.set("nna.inode.collection.impl", EclipseINodeCollection.class.getCanonicalName());
     nna.init(conf, gset);
   }
+
+  @Test
+  public void testLoadAirConcurrentMapFilterer() throws Exception {
+    conf.set("nna.inode.collection.impl", AirConcurrentMapINodeCollection.class.getCanonicalName());
+    nna.init(conf, gset);
+  }
 }