diff --git a/.asf.yaml b/.asf.yaml
index 91483dfed336c..e1c11790c8bfb 100644
--- a/.asf.yaml
+++ b/.asf.yaml
@@ -22,4 +22,4 @@ notifications:
commits: common-commits@hadoop.apache.org
issues: common-issues@hadoop.apache.org
pullrequests: common-issues@hadoop.apache.org
- jira_options: link label worklog
\ No newline at end of file
+ jira_options: comment link label
\ No newline at end of file
diff --git a/.yetus/excludes.txt b/.yetus/excludes.txt
new file mode 100644
index 0000000000000..0064dc8a3a4bb
--- /dev/null
+++ b/.yetus/excludes.txt
@@ -0,0 +1,17 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+dev-support/docker/Dockerfile_windows_10
diff --git a/BUILDING.txt b/BUILDING.txt
index edf47c5f1337a..b872d7e41944f 100644
--- a/BUILDING.txt
+++ b/BUILDING.txt
@@ -57,7 +57,7 @@ Refer to dev-support/docker/Dockerfile):
* Open JDK 1.8
$ sudo apt-get update
- $ sudo apt-get -y install java-8-openjdk
+ $ sudo apt-get -y install openjdk-8-jdk
* Maven
$ sudo apt-get -y install maven
* Native libraries
@@ -492,39 +492,66 @@ Building on CentOS 8
----------------------------------------------------------------------------------
-Building on Windows
+Building on Windows 10
----------------------------------------------------------------------------------
Requirements:
-* Windows System
+* Windows 10
* JDK 1.8
-* Maven 3.0 or later
-* Boost 1.72
-* Protocol Buffers 3.7.1
-* CMake 3.19 or newer
-* Visual Studio 2010 Professional or Higher
-* Windows SDK 8.1 (if building CPU rate control for the container executor)
-* zlib headers (if building native code bindings for zlib)
+* Maven 3.0 or later (maven.apache.org)
+* Boost 1.72 (boost.org)
+* Protocol Buffers 3.7.1 (https://github.com/protocolbuffers/protobuf/releases)
+* CMake 3.19 or newer (cmake.org)
+* Visual Studio 2019 (visualstudio.com)
+* Windows SDK 8.1 (optional, if building CPU rate control for the container executor. Get this from
+ http://msdn.microsoft.com/en-us/windows/bg162891.aspx)
+* Zlib (zlib.net, if building native code bindings for zlib)
+* Git (preferably, get this from https://git-scm.com/download/win since the package also contains
+ Unix command-line tools that are needed during packaging).
+* Python (python.org, for generation of docs using 'mvn site')
* Internet connection for first build (to fetch all Maven and Hadoop dependencies)
-* Unix command-line tools from GnuWin32: sh, mkdir, rm, cp, tar, gzip. These
- tools must be present on your PATH.
-* Python ( for generation of docs using 'mvn site')
-Unix command-line tools are also included with the Windows Git package which
-can be downloaded from http://git-scm.com/downloads
+----------------------------------------------------------------------------------
-If using Visual Studio, it must be Professional level or higher.
-Do not use Visual Studio Express. It does not support compiling for 64-bit,
-which is problematic if running a 64-bit system.
+Building guidelines:
-The Windows SDK 8.1 is available to download at:
+Hadoop repository provides the Dockerfile for building Hadoop on Windows 10, located at
+dev-support/docker/Dockerfile_windows_10. It is highly recommended to use this and create the
+Docker image for building Hadoop on Windows 10, since you don't have to install anything else
+other than Docker and no additional steps are required in terms of aligning the environment with
+the necessary paths etc.
-http://msdn.microsoft.com/en-us/windows/bg162891.aspx
+However, if you still prefer taking the route of not using Docker, this Dockerfile_windows_10 will
+still be immensely useful as a raw guide for all the steps involved in creating the environment
+needed to build Hadoop on Windows 10.
-Cygwin is not required.
+Building using the Docker:
+We first need to build the Docker image for building Hadoop on Windows 10. Run this command from
+the root of the Hadoop repository.
+> docker build -t hadoop-windows-10-builder -f .\dev-support\docker\Dockerfile_windows_10 .\dev-support\docker\
+
+Start the container with the image that we just built.
+> docker run --rm -it hadoop-windows-10-builder
+
+You can now clone the Hadoop repo inside this container and proceed with the build.
+
+NOTE:
+While one may perceive the idea of mounting the locally cloned (on the host filesystem) Hadoop
+repository into the container (using the -v option), we have seen the build to fail owing to some
+files not being able to be located by Maven. Thus, we suggest cloning the Hadoop repository to a
+non-mounted folder inside the container and proceed with the build. When the build is completed,
+you may use the "docker cp" command to copy the built Hadoop tar.gz file from the docker container
+to the host filesystem. If you still would like to mount the Hadoop codebase, a workaround would
+be to copy the mounted Hadoop codebase into another folder (which doesn't point to a mount) in the
+container's filesystem and use this for building.
+
+However, we noticed no build issues when the Maven repository from the host filesystem was mounted
+into the container. One may use this to greatly reduce the build time. Assuming that the Maven
+repository is located at D:\Maven\Repository in the host filesystem, one can use the following
+command to mount the same onto the default Maven repository location while launching the container.
+> docker run --rm -v D:\Maven\Repository:C:\Users\ContainerAdministrator\.m2\repository -it hadoop-windows-10-builder
-----------------------------------------------------------------------------------
Building:
Keep the source code tree in a short path to avoid running into problems related
@@ -540,6 +567,24 @@ configure the bit-ness of the build, and set several optional components.
Several tests require that the user must have the Create Symbolic Links
privilege.
+To simplify the installation of Boost, Protocol buffers, OpenSSL and Zlib dependencies we can use
+vcpkg (https://github.com/Microsoft/vcpkg.git). Upon cloning the vcpkg repo, checkout the commit
+7ffa425e1db8b0c3edf9c50f2f3a0f25a324541d to get the required versions of the dependencies
+mentioned above.
+> git clone https://github.com/Microsoft/vcpkg.git
+> cd vcpkg
+> git checkout 7ffa425e1db8b0c3edf9c50f2f3a0f25a324541d
+> .\bootstrap-vcpkg.bat
+> .\vcpkg.exe install boost:x64-windows
+> .\vcpkg.exe install protobuf:x64-windows
+> .\vcpkg.exe install openssl:x64-windows
+> .\vcpkg.exe install zlib:x64-windows
+
+Set the following environment variables -
+(Assuming that vcpkg was checked out at C:\vcpkg)
+> set PROTOBUF_HOME=C:\vcpkg\installed\x64-windows
+> set MAVEN_OPTS=-Xmx2048M -Xss128M
+
All Maven goals are the same as described above with the exception that
native code is built by enabling the 'native-win' Maven profile. -Pnative-win
is enabled by default when building on Windows since the native components
@@ -557,6 +602,24 @@ the zlib 1.2.7 source tree.
http://www.zlib.net/
+
+Build command:
+The following command builds all the modules in the Hadoop project and generates the tar.gz file in
+hadoop-dist/target upon successful build. Run these commands from an
+"x64 Native Tools Command Prompt for VS 2019" which can be found under "Visual Studio 2019" in the
+Windows start menu. If you're using the Docker image from Dockerfile_windows_10, you'll be
+logged into "x64 Native Tools Command Prompt for VS 2019" automatically when you start the
+container.
+
+> set classpath=
+> set PROTOBUF_HOME=C:\vcpkg\installed\x64-windows
+> mvn clean package -Dhttps.protocols=TLSv1.2 -DskipTests -DskipDocs -Pnative-win,dist^
+ -Drequire.openssl -Drequire.test.libhadoop -Pyarn-ui -Dshell-executable=C:\Git\bin\bash.exe^
+ -Dtar -Dopenssl.prefix=C:\vcpkg\installed\x64-windows^
+ -Dcmake.prefix.path=C:\vcpkg\installed\x64-windows^
+ -Dwindows.cmake.toolchain.file=C:\vcpkg\scripts\buildsystems\vcpkg.cmake -Dwindows.cmake.build.type=RelWithDebInfo^
+ -Dwindows.build.hdfspp.dll=off -Dwindows.no.sasl=on -Duse.platformToolsetVersion=v142
+
----------------------------------------------------------------------------------
Building distributions:
diff --git a/LICENSE-binary b/LICENSE-binary
index 824dbee29f6a9..988e38fa390d4 100644
--- a/LICENSE-binary
+++ b/LICENSE-binary
@@ -214,24 +214,24 @@ com.aliyun:aliyun-java-sdk-core:3.4.0
com.aliyun:aliyun-java-sdk-ecs:4.2.0
com.aliyun:aliyun-java-sdk-ram:3.0.0
com.aliyun:aliyun-java-sdk-sts:3.0.0
-com.aliyun.oss:aliyun-sdk-oss:3.13.0
-com.amazonaws:aws-java-sdk-bundle:1.11.901
+com.aliyun.oss:aliyun-sdk-oss:3.13.2
+com.amazonaws:aws-java-sdk-bundle:1.12.316
com.cedarsoftware:java-util:1.9.0
com.cedarsoftware:json-io:2.5.1
-com.fasterxml.jackson.core:jackson-annotations:2.9.9
-com.fasterxml.jackson.core:jackson-core:2.9.9
-com.fasterxml.jackson.core:jackson-databind:2.9.9.2
-com.fasterxml.jackson.jaxrs:jackson-jaxrs-base:2.9.9
-com.fasterxml.jackson.jaxrs:jackson-jaxrs-json-provider:2.9.9
-com.fasterxml.jackson.module:jackson-module-jaxb-annotations:2.9.9
+com.fasterxml.jackson.core:jackson-annotations:2.12.7
+com.fasterxml.jackson.core:jackson-core:2.12.7
+com.fasterxml.jackson.core:jackson-databind:2.12.7.1
+com.fasterxml.jackson.jaxrs:jackson-jaxrs-base:2.12.7
+com.fasterxml.jackson.jaxrs:jackson-jaxrs-json-provider:2.12.7
+com.fasterxml.jackson.module:jackson-module-jaxb-annotations:2.12.7
com.fasterxml.uuid:java-uuid-generator:3.1.4
-com.fasterxml.woodstox:woodstox-core:5.3.0
+com.fasterxml.woodstox:woodstox-core:5.4.0
com.github.davidmoten:rxjava-extras:0.8.0.17
com.github.stephenc.jcip:jcip-annotations:1.0-1
com.google:guice:4.0
com.google:guice-servlet:4.0
com.google.api.grpc:proto-google-common-protos:1.0.0
-com.google.code.gson:2.2.4
+com.google.code.gson:2.9.0
com.google.errorprone:error_prone_annotations:2.2.0
com.google.j2objc:j2objc-annotations:1.1
com.google.json-simple:json-simple:1.1.1
@@ -241,18 +241,17 @@ com.google.guava:guava:27.0-jre
com.google.guava:listenablefuture:9999.0-empty-to-avoid-conflict-with-guava
com.microsoft.azure:azure-storage:7.0.0
com.nimbusds:nimbus-jose-jwt:9.8.1
-com.squareup.okhttp:okhttp:2.7.5
-com.squareup.okio:okio:1.6.0
+com.squareup.okhttp3:okhttp:4.10.0
+com.squareup.okio:okio:3.2.0
com.zaxxer:HikariCP:4.0.3
-commons-beanutils:commons-beanutils:1.9.3
+commons-beanutils:commons-beanutils:1.9.4
commons-cli:commons-cli:1.2
commons-codec:commons-codec:1.11
commons-collections:commons-collections:3.2.2
commons-daemon:commons-daemon:1.0.13
commons-io:commons-io:2.8.0
-commons-lang:commons-lang:2.6
commons-logging:commons-logging:1.1.3
-commons-net:commons-net:3.6
+commons-net:commons-net:3.8.0
de.ruedigermoeller:fst:2.50
io.grpc:grpc-api:1.26.0
io.grpc:grpc-context:1.26.0
@@ -262,17 +261,36 @@ io.grpc:grpc-protobuf:1.26.0
io.grpc:grpc-protobuf-lite:1.26.0
io.grpc:grpc-stub:1.26.0
io.netty:netty:3.10.6.Final
-io.netty:netty-all:4.1.42.Final
-io.netty:netty-buffer:4.1.27.Final
-io.netty:netty-codec:4.1.27.Final
-io.netty:netty-codec-http:4.1.27.Final
-io.netty:netty-codec-http2:4.1.27.Final
-io.netty:netty-codec-socks:4.1.27.Final
-io.netty:netty-common:4.1.27.Final
-io.netty:netty-handler:4.1.27.Final
-io.netty:netty-handler-proxy:4.1.27.Final
-io.netty:netty-resolver:4.1.27.Final
-io.netty:netty-transport:4.1.27.Final
+io.netty:netty-all:4.1.77.Final
+io.netty:netty-buffer:4.1.77.Final
+io.netty:netty-codec:4.1.77.Final
+io.netty:netty-codec-dns:4.1.77.Final
+io.netty:netty-codec-haproxy:4.1.77.Final
+io.netty:netty-codec-http:4.1.77.Final
+io.netty:netty-codec-http2:4.1.77.Final
+io.netty:netty-codec-memcache:4.1.77.Final
+io.netty:netty-codec-mqtt:4.1.77.Final
+io.netty:netty-codec-redis:4.1.77.Final
+io.netty:netty-codec-smtp:4.1.77.Final
+io.netty:netty-codec-socks:4.1.77.Final
+io.netty:netty-codec-stomp:4.1.77.Final
+io.netty:netty-codec-xml:4.1.77.Final
+io.netty:netty-common:4.1.77.Final
+io.netty:netty-handler:4.1.77.Final
+io.netty:netty-handler-proxy:4.1.77.Final
+io.netty:netty-resolver:4.1.77.Final
+io.netty:netty-resolver-dns:4.1.77.Final
+io.netty:netty-transport:4.1.77.Final
+io.netty:netty-transport-rxtx:4.1.77.Final
+io.netty:netty-transport-sctp:4.1.77.Final
+io.netty:netty-transport-udt:4.1.77.Final
+io.netty:netty-transport-classes-epoll:4.1.77.Final
+io.netty:netty-transport-native-unix-common:4.1.77.Final
+io.netty:netty-transport-classes-kqueue:4.1.77.Final
+io.netty:netty-resolver-dns-classes-macos:4.1.77.Final
+io.netty:netty-transport-native-epoll:4.1.77.Final
+io.netty:netty-transport-native-kqueue:4.1.77.Final
+io.netty:netty-resolver-dns-native-macos:4.1.77.Final
io.opencensus:opencensus-api:0.12.3
io.opencensus:opencensus-contrib-grpc-metrics:0.12.3
io.reactivex:rxjava:1.3.8
@@ -284,70 +302,67 @@ log4j:log4j:1.2.17
net.java.dev.jna:jna:5.2.0
net.minidev:accessors-smart:1.2
net.minidev:json-smart:2.4.7
-org.apache.avro:avro:1.7.7
+org.apache.avro:avro:1.9.2
org.apache.commons:commons-collections4:4.2
org.apache.commons:commons-compress:1.21
-org.apache.commons:commons-configuration2:2.1.1
-org.apache.commons:commons-csv:1.0
+org.apache.commons:commons-configuration2:2.8.0
+org.apache.commons:commons-csv:1.9.0
org.apache.commons:commons-digester:1.8.1
-org.apache.commons:commons-lang3:3.7
-org.apache.commons:commons-math3:3.1.1
-org.apache.commons:commons-text:1.4
+org.apache.commons:commons-lang3:3.12.0
+org.apache.commons:commons-math3:3.6.1
+org.apache.commons:commons-text:1.10.0
org.apache.commons:commons-validator:1.6
org.apache.curator:curator-client:5.2.0
org.apache.curator:curator-framework:5.2.0
org.apache.curator:curator-recipes:5.2.0
org.apache.geronimo.specs:geronimo-jcache_1.0_spec:1.0-alpha-1
-org.apache.hbase:hbase-annotations:1.4.8
-org.apache.hbase:hbase-client:1.4.8
-org.apache.hbase:hbase-common:1.4.8
-org.apache.hbase:hbase-protocol:1.4.8
+org.apache.hbase:hbase-annotations:1.7.1
+org.apache.hbase:hbase-client:1.7.1
+org.apache.hbase:hbase-common:1.7.1
+org.apache.hbase:hbase-protocol:1.7.1
org.apache.htrace:htrace-core:3.1.0-incubating
org.apache.htrace:htrace-core4:4.1.0-incubating
org.apache.httpcomponents:httpclient:4.5.6
org.apache.httpcomponents:httpcore:4.4.10
org.apache.kafka:kafka-clients:2.8.1
-org.apache.kerby:kerb-admin:1.0.1
-org.apache.kerby:kerb-client:1.0.1
-org.apache.kerby:kerb-common:1.0.1
-org.apache.kerby:kerb-core:1.0.1
-org.apache.kerby:kerb-crypto:1.0.1
-org.apache.kerby:kerb-identity:1.0.1
-org.apache.kerby:kerb-server:1.0.1
-org.apache.kerby:kerb-simplekdc:1.0.1
-org.apache.kerby:kerb-util:1.0.1
-org.apache.kerby:kerby-asn1:1.0.1
-org.apache.kerby:kerby-config:1.0.1
-org.apache.kerby:kerby-pkix:1.0.1
-org.apache.kerby:kerby-util:1.0.1
-org.apache.kerby:kerby-xdr:1.0.1
-org.apache.kerby:token-provider:1.0.1
+org.apache.kerby:kerb-admin:2.0.2
+org.apache.kerby:kerb-client:2.0.2
+org.apache.kerby:kerb-common:2.0.2
+org.apache.kerby:kerb-core:2.0.2
+org.apache.kerby:kerb-crypto:2.0.2
+org.apache.kerby:kerb-identity:2.0.2
+org.apache.kerby:kerb-server:2.0.2
+org.apache.kerby:kerb-simplekdc:2.0.2
+org.apache.kerby:kerb-util:2.0.2
+org.apache.kerby:kerby-asn1:2.0.2
+org.apache.kerby:kerby-config:2.0.2
+org.apache.kerby:kerby-pkix:2.0.2
+org.apache.kerby:kerby-util:2.0.2
+org.apache.kerby:kerby-xdr:2.0.2
+org.apache.kerby:token-provider:2.0.2
+org.apache.solr:solr-solrj:8.8.2
org.apache.yetus:audience-annotations:0.5.0
org.apache.zookeeper:zookeeper:3.6.3
-org.codehaus.jackson:jackson-core-asl:1.9.13
-org.codehaus.jackson:jackson-jaxrs:1.9.13
-org.codehaus.jackson:jackson-mapper-asl:1.9.13
-org.codehaus.jackson:jackson-xc:1.9.13
-org.codehaus.jettison:jettison:1.1
-org.eclipse.jetty:jetty-annotations:9.3.27.v20190418
-org.eclipse.jetty:jetty-http:9.3.27.v20190418
-org.eclipse.jetty:jetty-io:9.3.27.v20190418
-org.eclipse.jetty:jetty-jndi:9.3.27.v20190418
-org.eclipse.jetty:jetty-plus:9.3.27.v20190418
-org.eclipse.jetty:jetty-security:9.3.27.v20190418
-org.eclipse.jetty:jetty-server:9.3.27.v20190418
-org.eclipse.jetty:jetty-servlet:9.3.27.v20190418
-org.eclipse.jetty:jetty-util:9.3.27.v20190418
-org.eclipse.jetty:jetty-util-ajax:9.3.27.v20190418
-org.eclipse.jetty:jetty-webapp:9.3.27.v20190418
-org.eclipse.jetty:jetty-xml:9.3.27.v20190418
-org.eclipse.jetty.websocket:javax-websocket-client-impl:9.3.27.v20190418
-org.eclipse.jetty.websocket:javax-websocket-server-impl:9.3.27.v20190418
+org.codehaus.jettison:jettison:1.5.1
+org.eclipse.jetty:jetty-annotations:9.4.48.v20220622
+org.eclipse.jetty:jetty-http:9.4.48.v20220622
+org.eclipse.jetty:jetty-io:9.4.48.v20220622
+org.eclipse.jetty:jetty-jndi:9.4.48.v20220622
+org.eclipse.jetty:jetty-plus:9.4.48.v20220622
+org.eclipse.jetty:jetty-security:9.4.48.v20220622
+org.eclipse.jetty:jetty-server:9.4.48.v20220622
+org.eclipse.jetty:jetty-servlet:9.4.48.v20220622
+org.eclipse.jetty:jetty-util:9.4.48.v20220622
+org.eclipse.jetty:jetty-util-ajax:9.4.48.v20220622
+org.eclipse.jetty:jetty-webapp:9.4.48.v20220622
+org.eclipse.jetty:jetty-xml:9.4.48.v20220622
+org.eclipse.jetty.websocket:javax-websocket-client-impl:9.4.48.v20220622
+org.eclipse.jetty.websocket:javax-websocket-server-impl:9.4.48.v20220622
org.ehcache:ehcache:3.3.1
org.lz4:lz4-java:1.7.1
org.objenesis:objenesis:2.6
org.xerial.snappy:snappy-java:1.0.5
-org.yaml:snakeyaml:1.16:
+org.yaml:snakeyaml:1.33
org.wildfly.openssl:wildfly-openssl:1.0.7.Final
@@ -405,14 +420,14 @@ hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dataTables.bootstrap.css
hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dataTables.bootstrap.js
hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dust-full-2.0.0.min.js
hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dust-helpers-1.1.1.min.js
-hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/jquery-3.5.1.min.js
+hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/jquery-3.6.0.min.js
hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/jquery.dataTables.min.js
hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/moment.min.js
hadoop-tools/hadoop-sls/src/main/html/js/thirdparty/bootstrap.min.js
hadoop-tools/hadoop-sls/src/main/html/js/thirdparty/jquery.js
hadoop-tools/hadoop-sls/src/main/html/css/bootstrap.min.css
hadoop-tools/hadoop-sls/src/main/html/css/bootstrap-responsive.min.css
-hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/static/dt-1.10.18/*
+hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/static/dt-1.11.5/*
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/static/jquery
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/static/jt/jquery.jstree.js
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/resources/TERMINAL
@@ -420,7 +435,7 @@ hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanage
bootstrap v3.3.6
broccoli-asset-rev v2.4.2
broccoli-funnel v1.0.1
-datatables v1.10.8
+datatables v1.11.5
em-helpers v0.5.13
em-table v0.1.6
ember v2.2.0
@@ -468,8 +483,8 @@ com.microsoft.azure:azure-cosmosdb-gateway:2.4.5
com.microsoft.azure:azure-data-lake-store-sdk:2.3.3
com.microsoft.azure:azure-keyvault-core:1.0.0
com.microsoft.sqlserver:mssql-jdbc:6.2.1.jre7
-org.bouncycastle:bcpkix-jdk15on:1.60
-org.bouncycastle:bcprov-jdk15on:1.60
+org.bouncycastle:bcpkix-jdk15on:1.68
+org.bouncycastle:bcprov-jdk15on:1.68
org.checkerframework:checker-qual:2.5.2
org.codehaus.mojo:animal-sniffer-annotations:1.17
org.jruby.jcodings:jcodings:1.0.13
@@ -484,12 +499,12 @@ org.slf4j:slf4j-log4j12:1.7.25
CDDL 1.1 + GPLv2 with classpath exception
-----------------------------------------
-com.sun.jersey:jersey-client:1.19
-com.sun.jersey:jersey-core:1.19
-com.sun.jersey:jersey-guice:1.19
-com.sun.jersey:jersey-json:1.19
-com.sun.jersey:jersey-server:1.19
-com.sun.jersey:jersey-servlet:1.19
+com.github.pjfanning:jersey-json:1.20
+com.sun.jersey:jersey-client:1.19.4
+com.sun.jersey:jersey-core:1.19.4
+com.sun.jersey:jersey-guice:1.19.4
+com.sun.jersey:jersey-server:1.19.4
+com.sun.jersey:jersey-servlet:1.19.4
com.sun.xml.bind:jaxb-impl:2.2.3-1
javax.annotation:javax.annotation-api:1.3.2
javax.servlet:javax.servlet-api:3.1.0
@@ -508,13 +523,13 @@ junit:junit:4.13.2
HSQL License
------------
-org.hsqldb:hsqldb:2.3.4
+org.hsqldb:hsqldb:2.7.1
JDOM License
------------
-org.jdom:jdom:1.1
+org.jdom:jdom2:2.0.6.1
Public Domain
diff --git a/LICENSE.txt b/LICENSE.txt
index 3c079898b9071..2dfc0b9da47a7 100644
--- a/LICENSE.txt
+++ b/LICENSE.txt
@@ -245,14 +245,14 @@ hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dataTables.bootstrap.css
hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dataTables.bootstrap.js
hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dust-full-2.0.0.min.js
hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/dust-helpers-1.1.1.min.js
-hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/jquery-3.5.1.min.js
+hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/jquery-3.6.0.min.js
hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/jquery.dataTables.min.js
hadoop-hdfs-project/hadoop-hdfs/src/main/webapps/static/moment.min.js
hadoop-tools/hadoop-sls/src/main/html/js/thirdparty/bootstrap.min.js
hadoop-tools/hadoop-sls/src/main/html/js/thirdparty/jquery.js
hadoop-tools/hadoop-sls/src/main/html/css/bootstrap.min.css
hadoop-tools/hadoop-sls/src/main/html/css/bootstrap-responsive.min.css
-hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/static/dt-1.10.18/*
+hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/static/dt-1.11.5/*
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/static/jquery
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-common/src/main/resources/webapps/static/jt/jquery.jstree.js
hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-nodemanager/src/main/resources/TERMINAL
diff --git a/NOTICE-binary b/NOTICE-binary
index 2f8a9241a8d00..b96e052658876 100644
--- a/NOTICE-binary
+++ b/NOTICE-binary
@@ -66,7 +66,7 @@ available from http://www.digip.org/jansson/.
AWS SDK for Java
-Copyright 2010-2014 Amazon.com, Inc. or its affiliates. All Rights Reserved.
+Copyright 2010-2022 Amazon.com, Inc. or its affiliates. All Rights Reserved.
This product includes software developed by
Amazon Technologies, Inc (http://www.amazon.com/).
diff --git a/dev-support/Jenkinsfile b/dev-support/Jenkinsfile
index 7896810d017d5..51225268b653a 100644
--- a/dev-support/Jenkinsfile
+++ b/dev-support/Jenkinsfile
@@ -15,13 +15,10 @@
// specific language governing permissions and limitations
// under the License.
-def getGithubAndJiraCreds() {
+def getGithubCreds() {
return [usernamePassword(credentialsId: 'apache-hadoop-at-github.com',
passwordVariable: 'GITHUB_TOKEN',
- usernameVariable: 'GITHUB_USER'),
- usernamePassword(credentialsId: 'hadoopqa-at-asf-jira',
- passwordVariable: 'JIRA_PASSWORD',
- usernameVariable: 'JIRA_USER')]
+ usernameVariable: 'GITHUB_USER')]
}
// Publish JUnit results only if there are XML files under surefire-reports
@@ -50,7 +47,7 @@ pipeline {
options {
buildDiscarder(logRotator(numToKeepStr: '5'))
- timeout (time: 24, unit: 'HOURS')
+ timeout (time: 48, unit: 'HOURS')
timestamps()
checkoutToSubdirectory('src')
}
@@ -58,7 +55,7 @@ pipeline {
environment {
YETUS='yetus'
// Branch or tag name. Yetus release tags are 'rel/X.Y.Z'
- YETUS_VERSION='f9ba0170a5787a5f4662d3769804fef0226a182f'
+ YETUS_VERSION='rel/0.14.0'
}
parameters {
@@ -130,7 +127,7 @@ pipeline {
}
steps {
- withCredentials(getGithubAndJiraCreds()) {
+ withCredentials(getGithubCreds()) {
sh '''#!/usr/bin/env bash
chmod u+x "${SOURCEDIR}/dev-support/jenkins.sh"
@@ -176,7 +173,7 @@ pipeline {
}
steps {
- withCredentials(getGithubAndJiraCreds()) {
+ withCredentials(getGithubCreds()) {
sh '''#!/usr/bin/env bash
chmod u+x "${SOURCEDIR}/dev-support/jenkins.sh"
@@ -222,7 +219,7 @@ pipeline {
}
steps {
- withCredentials(getGithubAndJiraCreds()) {
+ withCredentials(getGithubCreds()) {
sh '''#!/usr/bin/env bash
chmod u+x "${SOURCEDIR}/dev-support/jenkins.sh"
@@ -265,7 +262,7 @@ pipeline {
}
steps {
- withCredentials(getGithubAndJiraCreds()) {
+ withCredentials(getGithubCreds()) {
sh '''#!/usr/bin/env bash
chmod u+x "${SOURCEDIR}/dev-support/jenkins.sh"
diff --git a/dev-support/bin/create-release b/dev-support/bin/create-release
index 31ae6ee1b0659..693b41c4f3910 100755
--- a/dev-support/bin/create-release
+++ b/dev-support/bin/create-release
@@ -293,6 +293,7 @@ function usage
echo "--security Emergency security release"
echo "--sign Use .gnupg dir to sign the artifacts and jars"
echo "--version=[version] Use an alternative version string"
+ echo "--mvnargs=[args] Extra Maven args to be provided when running mvn commands"
}
function option_parse
@@ -347,6 +348,9 @@ function option_parse
--version=*)
HADOOP_VERSION=${i#*=}
;;
+ --mvnargs=*)
+ MVNEXTRAARGS=${i#*=}
+ ;;
esac
done
@@ -413,6 +417,9 @@ function option_parse
MVN_ARGS=("-Dmaven.repo.local=${MVNCACHE}")
fi
fi
+ if [ -n "$MVNEXTRAARGS" ]; then
+ MVN_ARGS+=("$MVNEXTRAARGS")
+ fi
if [[ "${SECURITYRELEASE}" = true ]]; then
if [[ ! -d "${BASEDIR}/hadoop-common-project/hadoop-common/src/site/markdown/release/${HADOOP_VERSION}" ]]; then
@@ -535,6 +542,10 @@ function makearelease
big_console_header "Cleaning the Source Tree"
+ # Since CVE-2022-24765 in April 2022, git refuses to work in directories
+ # whose owner != the current user, unless explicitly told to trust it.
+ git config --global --add safe.directory /build/source
+
# git clean to clear any remnants from previous build
run "${GIT}" clean -xdf -e /patchprocess
diff --git a/dev-support/bin/yetus-wrapper b/dev-support/bin/yetus-wrapper
index 8532d1749701b..a93833767a201 100755
--- a/dev-support/bin/yetus-wrapper
+++ b/dev-support/bin/yetus-wrapper
@@ -77,7 +77,7 @@ WANTED="$1"
shift
ARGV=("$@")
-HADOOP_YETUS_VERSION=${HADOOP_YETUS_VERSION:-0.13.0}
+HADOOP_YETUS_VERSION=${HADOOP_YETUS_VERSION:-0.14.0}
BIN=$(yetus_abs "${BASH_SOURCE-$0}")
BINDIR=$(dirname "${BIN}")
diff --git a/dev-support/docker/Dockerfile_centos_8 b/dev-support/docker/Dockerfile_centos_8
index 7b82c4997dee6..8f3b008f7ba03 100644
--- a/dev-support/docker/Dockerfile_centos_8
+++ b/dev-support/docker/Dockerfile_centos_8
@@ -30,6 +30,13 @@ COPY pkg-resolver pkg-resolver
RUN chmod a+x pkg-resolver/*.sh pkg-resolver/*.py \
&& chmod a+r pkg-resolver/*.json
+######
+# Centos 8 has reached its EOL and the packages
+# are no longer available on mirror.centos.org site.
+# Please see https://www.centos.org/centos-linux-eol/
+######
+RUN pkg-resolver/set-vault-as-baseurl-centos.sh centos:8
+
######
# Install packages from yum
######
diff --git a/dev-support/docker/Dockerfile_debian_10 b/dev-support/docker/Dockerfile_debian_10
index 256f0d5786ab9..ec3de11035cee 100644
--- a/dev-support/docker/Dockerfile_debian_10
+++ b/dev-support/docker/Dockerfile_debian_10
@@ -82,6 +82,7 @@ ENV HADOOP_SKIP_YETUS_VERIFICATION true
####
# Install packages
####
+RUN pkg-resolver/install-cmake.sh debian:10
RUN pkg-resolver/install-spotbugs.sh debian:10
RUN pkg-resolver/install-boost.sh debian:10
RUN pkg-resolver/install-protobuf.sh debian:10
diff --git a/dev-support/docker/Dockerfile_windows_10 b/dev-support/docker/Dockerfile_windows_10
new file mode 100644
index 0000000000000..7a69a2727ae50
--- /dev/null
+++ b/dev-support/docker/Dockerfile_windows_10
@@ -0,0 +1,81 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# Dockerfile for installing the necessary dependencies for building Hadoop.
+# See BUILDING.txt.
+
+FROM mcr.microsoft.com/windows:ltsc2019
+
+# Need to disable the progress bar for speeding up the downloads.
+# hadolint ignore=SC2086
+RUN powershell $Global:ProgressPreference = 'SilentlyContinue'
+
+# Restore the default Windows shell for correct batch processing.
+SHELL ["cmd", "/S", "/C"]
+
+# Install Visual Studio 2019 Build Tools.
+RUN curl -SL --output vs_buildtools.exe https://aka.ms/vs/16/release/vs_buildtools.exe \
+ && (start /w vs_buildtools.exe --quiet --wait --norestart --nocache \
+ --installPath "%ProgramFiles(x86)%\Microsoft Visual Studio\2019\BuildTools" \
+ --add Microsoft.VisualStudio.Workload.VCTools \
+ --add Microsoft.VisualStudio.Component.VC.ASAN \
+ --add Microsoft.VisualStudio.Component.VC.Tools.x86.x64 \
+ --add Microsoft.VisualStudio.Component.Windows10SDK.19041 \
+ || IF "%ERRORLEVEL%"=="3010" EXIT 0) \
+ && del /q vs_buildtools.exe
+
+# Install Chocolatey.
+RUN powershell -NoProfile -ExecutionPolicy Bypass -Command "iex ((New-Object System.Net.WebClient).DownloadString('https://chocolatey.org/install.ps1'))"
+RUN setx PATH "%PATH%;%ALLUSERSPROFILE%\chocolatey\bin"
+
+# Install git.
+RUN choco install git.install -y
+RUN powershell Copy-Item -Recurse -Path 'C:\Program Files\Git' -Destination C:\Git
+
+# Install vcpkg.
+# hadolint ignore=DL3003
+RUN powershell git clone https://github.com/microsoft/vcpkg.git \
+ && cd vcpkg \
+ && git checkout 7ffa425e1db8b0c3edf9c50f2f3a0f25a324541d \
+ && .\bootstrap-vcpkg.bat
+RUN powershell .\vcpkg\vcpkg.exe install boost:x64-windows
+RUN powershell .\vcpkg\vcpkg.exe install protobuf:x64-windows
+RUN powershell .\vcpkg\vcpkg.exe install openssl:x64-windows
+RUN powershell .\vcpkg\vcpkg.exe install zlib:x64-windows
+ENV PROTOBUF_HOME "C:\vcpkg\installed\x64-windows"
+
+# Install Azul Java 8 JDK.
+RUN powershell Invoke-WebRequest -URI https://cdn.azul.com/zulu/bin/zulu8.62.0.19-ca-jdk8.0.332-win_x64.zip -OutFile $Env:TEMP\zulu8.62.0.19-ca-jdk8.0.332-win_x64.zip
+RUN powershell Expand-Archive -Path $Env:TEMP\zulu8.62.0.19-ca-jdk8.0.332-win_x64.zip -DestinationPath "C:\Java"
+ENV JAVA_HOME "C:\Java\zulu8.62.0.19-ca-jdk8.0.332-win_x64"
+RUN setx PATH "%PATH%;%JAVA_HOME%\bin"
+
+# Install Apache Maven.
+RUN powershell Invoke-WebRequest -URI https://dlcdn.apache.org/maven/maven-3/3.8.6/binaries/apache-maven-3.8.6-bin.zip -OutFile $Env:TEMP\apache-maven-3.8.6-bin.zip
+RUN powershell Expand-Archive -Path $Env:TEMP\apache-maven-3.8.6-bin.zip -DestinationPath "C:\Maven"
+RUN setx PATH "%PATH%;C:\Maven\apache-maven-3.8.6\bin"
+ENV MAVEN_OPTS '-Xmx2048M -Xss128M'
+
+# Install CMake 3.19.0.
+RUN powershell Invoke-WebRequest -URI https://cmake.org/files/v3.19/cmake-3.19.0-win64-x64.zip -OutFile $Env:TEMP\cmake-3.19.0-win64-x64.zip
+RUN powershell Expand-Archive -Path $Env:TEMP\cmake-3.19.0-win64-x64.zip -DestinationPath "C:\CMake"
+RUN setx PATH "%PATH%;C:\CMake\cmake-3.19.0-win64-x64\bin"
+
+# We get strange Javadoc errors without this.
+RUN setx classpath ""
+
+# Define the entry point for the docker container.
+ENTRYPOINT ["C:\\Program Files (x86)\\Microsoft Visual Studio\\2019\\BuildTools\\VC\\Auxiliary\\Build\\vcvars64.bat", "&&", "cmd.exe"]
diff --git a/dev-support/docker/pkg-resolver/install-maven.sh b/dev-support/docker/pkg-resolver/install-maven.sh
index f9ff961a190f9..d1d0dc97fe5e4 100644
--- a/dev-support/docker/pkg-resolver/install-maven.sh
+++ b/dev-support/docker/pkg-resolver/install-maven.sh
@@ -40,7 +40,7 @@ fi
if [ "$version_to_install" == "3.6.3" ]; then
mkdir -p /opt/maven /tmp/maven &&
- curl -L -s -S https://mirrors.estointernet.in/apache/maven/maven-3/3.6.3/binaries/apache-maven-3.6.3-bin.tar.gz \
+ curl -L -s -S https://dlcdn.apache.org/maven/maven-3/3.6.3/binaries/apache-maven-3.6.3-bin.tar.gz \
-o /tmp/maven/apache-maven-3.6.3-bin.tar.gz &&
tar xzf /tmp/maven/apache-maven-3.6.3-bin.tar.gz --strip-components 1 -C /opt/maven
else
diff --git a/dev-support/docker/pkg-resolver/packages.json b/dev-support/docker/pkg-resolver/packages.json
index afe8a7a32b107..2225517834601 100644
--- a/dev-support/docker/pkg-resolver/packages.json
+++ b/dev-support/docker/pkg-resolver/packages.json
@@ -62,7 +62,6 @@
"centos:8": "clang"
},
"cmake": {
- "debian:10": "cmake",
"ubuntu:focal": "cmake",
"ubuntu:focal::arch64": "cmake"
},
diff --git a/dev-support/docker/pkg-resolver/set-vault-as-baseurl-centos.sh b/dev-support/docker/pkg-resolver/set-vault-as-baseurl-centos.sh
new file mode 100644
index 0000000000000..4be4cd956b15b
--- /dev/null
+++ b/dev-support/docker/pkg-resolver/set-vault-as-baseurl-centos.sh
@@ -0,0 +1,33 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+if [ $# -lt 1 ]; then
+ echo "ERROR: Need at least 1 argument, $# were provided"
+ exit 1
+fi
+
+if [ "$1" == "centos:7" ] || [ "$1" == "centos:8" ]; then
+ cd /etc/yum.repos.d/ || exit &&
+ sed -i 's/mirrorlist/#mirrorlist/g' /etc/yum.repos.d/CentOS-* &&
+ sed -i 's|#baseurl=http://mirror.centos.org|baseurl=http://vault.centos.org|g' /etc/yum.repos.d/CentOS-* &&
+ yum update -y &&
+ cd /root || exit
+else
+ echo "ERROR: Setting the archived baseurl is only supported for centos 7 and 8 environments"
+ exit 1
+fi
diff --git a/dev-support/git-jira-validation/README.md b/dev-support/git-jira-validation/README.md
new file mode 100644
index 0000000000000..308c54228d17c
--- /dev/null
+++ b/dev-support/git-jira-validation/README.md
@@ -0,0 +1,134 @@
+
+
+Apache Hadoop Git/Jira FixVersion validation
+============================================================
+
+Git commits in Apache Hadoop contains Jira number of the format
+HADOOP-XXXX or HDFS-XXXX or YARN-XXXX or MAPREDUCE-XXXX.
+While creating a release candidate, we also include changelist
+and this changelist can be identified based on Fixed/Closed Jiras
+with the correct fix versions. However, sometimes we face few
+inconsistencies between fixed Jira and Git commit message.
+
+git_jira_fix_version_check.py script takes care of
+identifying all git commits with commit
+messages with any of these issues:
+
+1. commit is reverted as per commit message
+2. commit does not contain Jira number format in message
+3. Jira does not have expected fixVersion
+4. Jira has expected fixVersion, but it is not yet resolved
+
+Moreover, this script also finds any resolved Jira with expected
+fixVersion but without any corresponding commit present.
+
+This should be useful as part of RC preparation.
+
+git_jira_fix_version_check supports python3 and it required
+installation of jira:
+
+```
+$ python3 --version
+Python 3.9.7
+
+$ python3 -m venv ./venv
+
+$ ./venv/bin/pip install -r dev-support/git-jira-validation/requirements.txt
+
+$ ./venv/bin/python dev-support/git-jira-validation/git_jira_fix_version_check.py
+
+```
+
+The script also requires below inputs:
+```
+1. First commit hash to start excluding commits from history:
+ Usually we can provide latest commit hash from last tagged release
+ so that the script will only loop through all commits in git commit
+ history before this commit hash. e.g for 3.3.2 release, we can provide
+ git hash: fa4915fdbbbec434ab41786cb17b82938a613f16
+ because this commit bumps up hadoop pom versions to 3.3.2:
+ https://github.com/apache/hadoop/commit/fa4915fdbbbec434ab41786cb17b82938a613f16
+
+2. Fix Version:
+ Exact fixVersion that we would like to compare all Jira's fixVersions
+ with. e.g for 3.3.2 release, it should be 3.3.2.
+
+3. JIRA Project Name:
+ The exact name of Project as case-sensitive e.g HADOOP / OZONE
+
+4. Path of project's working dir with release branch checked-in:
+ Path of project from where we want to compare git hashes from. Local fork
+ of the project should be up-to date with upstream and expected release
+ branch should be checked-in.
+
+5. Jira server url (default url: https://issues.apache.org/jira):
+ Default value of server points to ASF Jiras but this script can be
+ used outside of ASF Jira too.
+```
+
+
+Example of script execution:
+```
+JIRA Project Name (e.g HADOOP / OZONE etc): HADOOP
+First commit hash to start excluding commits from history: fa4915fdbbbec434ab41786cb17b82938a613f16
+Fix Version: 3.3.2
+Jira server url (default: https://issues.apache.org/jira):
+Path of project's working dir with release branch checked-in: /Users/vjasani/Documents/src/hadoop-3.3/hadoop
+
+Check git status output and verify expected branch
+
+On branch branch-3.3.2
+Your branch is up to date with 'origin/branch-3.3.2'.
+
+nothing to commit, working tree clean
+
+
+Jira/Git commit message diff starting: ##############################################
+Jira not present with version: 3.3.2. Commit: 8cd8e435fb43a251467ca74fadcb14f21a3e8163 HADOOP-17198. Support S3 Access Points (#3260) (branch-3.3.2) (#3955)
+WARN: Jira not found. Commit: 8af28b7cca5c6020de94e739e5373afc69f399e5 Updated the index as per 3.3.2 release
+WARN: Jira not found. Commit: e42e483d0085aa46543ebcb1196dd155ddb447d0 Make upstream aware of 3.3.1 release
+Commit seems reverted. Commit: 6db1165380cd308fb74c9d17a35c1e57174d1e09 Revert "HDFS-14099. Unknown frame descriptor when decompressing multiple frames (#3836)"
+Commit seems reverted. Commit: 1e3f94fa3c3d4a951d4f7438bc13e6f008f228f4 Revert "HDFS-16333. fix balancer bug when transfer an EC block (#3679)"
+Jira not present with version: 3.3.2. Commit: ce0bc7b473a62a580c1227a4de6b10b64b045d3a HDFS-16344. Improve DirectoryScanner.Stats#toString (#3695)
+Jira not present with version: 3.3.2. Commit: 30f0629d6e6f735c9f4808022f1a1827c5531f75 HDFS-16339. Show the threshold when mover threads quota is exceeded (#3689)
+Jira not present with version: 3.3.2. Commit: e449daccf486219e3050254d667b74f92e8fc476 YARN-11007. Correct words in YARN documents (#3680)
+Commit seems reverted. Commit: 5c189797828e60a3329fd920ecfb99bcbccfd82d Revert "HDFS-16336. Addendum: De-flake TestRollingUpgrade#testRollback (#3686)"
+Jira not present with version: 3.3.2. Commit: 544dffd179ed756bc163e4899e899a05b93d9234 HDFS-16171. De-flake testDecommissionStatus (#3280)
+Jira not present with version: 3.3.2. Commit: c6914b1cb6e4cab8263cd3ae5cc00bc7a8de25de HDFS-16350. Datanode start time should be set after RPC server starts successfully (#3711)
+Jira not present with version: 3.3.2. Commit: 328d3b84dfda9399021ccd1e3b7afd707e98912d HDFS-16336. Addendum: De-flake TestRollingUpgrade#testRollback (#3686)
+Jira not present with version: 3.3.2. Commit: 3ae8d4ccb911c9ababd871824a2fafbb0272c016 HDFS-16336. De-flake TestRollingUpgrade#testRollback (#3686)
+Jira not present with version: 3.3.2. Commit: 15d3448e25c797b7d0d401afdec54683055d4bb5 HADOOP-17975. Fallback to simple auth does not work for a secondary DistributedFileSystem instance. (#3579)
+Jira not present with version: 3.3.2. Commit: dd50261219de71eaa0a1ad28529953e12dfb92e0 YARN-10991. Fix to ignore the grouping "[]" for resourcesStr in parseResourcesString method (#3592)
+Jira not present with version: 3.3.2. Commit: ef462b21bf03b10361d2f9ea7b47d0f7360e517f HDFS-16332. Handle invalid token exception in sasl handshake (#3677)
+WARN: Jira not found. Commit: b55edde7071419410ea5bea4ce6462b980e48f5b Also update hadoop.version to 3.3.2
+...
+...
+...
+Found first commit hash after which git history is redundant. commit: fa4915fdbbbec434ab41786cb17b82938a613f16
+Exiting successfully
+Jira/Git commit message diff completed: ##############################################
+
+Any resolved Jira with fixVersion 3.3.2 but corresponding commit not present
+Starting diff: ##############################################
+HADOOP-18066 is marked resolved with fixVersion 3.3.2 but no corresponding commit found
+HADOOP-17936 is marked resolved with fixVersion 3.3.2 but no corresponding commit found
+Completed diff: ##############################################
+
+
+```
+
diff --git a/dev-support/git-jira-validation/git_jira_fix_version_check.py b/dev-support/git-jira-validation/git_jira_fix_version_check.py
new file mode 100644
index 0000000000000..513cc8e25ffdb
--- /dev/null
+++ b/dev-support/git-jira-validation/git_jira_fix_version_check.py
@@ -0,0 +1,117 @@
+#!/usr/bin/env python3
+############################################################################
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+############################################################################
+"""An application to assist Release Managers with ensuring that histories in
+Git and fixVersions in JIRA are in agreement. See README.md for a detailed
+explanation.
+"""
+
+
+import os
+import re
+import subprocess
+
+from jira import JIRA
+
+jira_project_name = input("JIRA Project Name (e.g HADOOP / OZONE etc): ") \
+ or "HADOOP"
+# Define project_jira_keys with - appended. e.g for HADOOP Jiras,
+# project_jira_keys should include HADOOP-, HDFS-, YARN-, MAPREDUCE-
+project_jira_keys = [jira_project_name + '-']
+if jira_project_name == 'HADOOP':
+ project_jira_keys.append('HDFS-')
+ project_jira_keys.append('YARN-')
+ project_jira_keys.append('MAPREDUCE-')
+
+first_exclude_commit_hash = input("First commit hash to start excluding commits from history: ")
+fix_version = input("Fix Version: ")
+
+jira_server_url = input(
+ "Jira server url (default: https://issues.apache.org/jira): ") \
+ or "https://issues.apache.org/jira"
+
+jira = JIRA(server=jira_server_url)
+
+local_project_dir = input("Path of project's working dir with release branch checked-in: ")
+os.chdir(local_project_dir)
+
+GIT_STATUS_MSG = subprocess.check_output(['git', 'status']).decode("utf-8")
+print('\nCheck git status output and verify expected branch\n')
+print(GIT_STATUS_MSG)
+
+print('\nJira/Git commit message diff starting: ##############################################')
+
+issue_set_from_commit_msg = set()
+
+for commit in subprocess.check_output(['git', 'log', '--pretty=oneline']).decode(
+ "utf-8").splitlines():
+ if commit.startswith(first_exclude_commit_hash):
+ print("Found first commit hash after which git history is redundant. commit: "
+ + first_exclude_commit_hash)
+ print("Exiting successfully")
+ break
+ if re.search('revert', commit, re.IGNORECASE):
+ print("Commit seems reverted. \t\t\t Commit: " + commit)
+ continue
+ ACTUAL_PROJECT_JIRA = None
+ matches = re.findall('|'.join(project_jira_keys), commit)
+ if matches:
+ ACTUAL_PROJECT_JIRA = matches[0]
+ if not ACTUAL_PROJECT_JIRA:
+ print("WARN: Jira not found. \t\t\t Commit: " + commit)
+ continue
+ JIRA_NUM = ''
+ for c in commit.split(ACTUAL_PROJECT_JIRA)[1]:
+ if c.isdigit():
+ JIRA_NUM = JIRA_NUM + c
+ else:
+ break
+ issue = jira.issue(ACTUAL_PROJECT_JIRA + JIRA_NUM)
+ EXPECTED_FIX_VERSION = False
+ for version in issue.fields.fixVersions:
+ if version.name == fix_version:
+ EXPECTED_FIX_VERSION = True
+ break
+ if not EXPECTED_FIX_VERSION:
+ print("Jira not present with version: " + fix_version + ". \t Commit: " + commit)
+ continue
+ if issue.fields.status is None or issue.fields.status.name not in ('Resolved', 'Closed'):
+ print("Jira is not resolved yet? \t\t Commit: " + commit)
+ else:
+ # This means Jira corresponding to current commit message is resolved with expected
+ # fixVersion.
+ # This is no-op by default, if needed, convert to print statement.
+ issue_set_from_commit_msg.add(ACTUAL_PROJECT_JIRA + JIRA_NUM)
+
+print('Jira/Git commit message diff completed: ##############################################')
+
+print('\nAny resolved Jira with fixVersion ' + fix_version
+ + ' but corresponding commit not present')
+print('Starting diff: ##############################################')
+all_issues_with_fix_version = jira.search_issues(
+ 'project=' + jira_project_name + ' and status in (Resolved,Closed) and fixVersion='
+ + fix_version)
+
+for issue in all_issues_with_fix_version:
+ if issue.key not in issue_set_from_commit_msg:
+ print(issue.key + ' is marked resolved with fixVersion ' + fix_version
+ + ' but no corresponding commit found')
+
+print('Completed diff: ##############################################')
diff --git a/dev-support/git-jira-validation/requirements.txt b/dev-support/git-jira-validation/requirements.txt
new file mode 100644
index 0000000000000..ae7535a119fa9
--- /dev/null
+++ b/dev-support/git-jira-validation/requirements.txt
@@ -0,0 +1,18 @@
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+jira==3.1.1
diff --git a/dev-support/hadoop-vote.sh b/dev-support/hadoop-vote.sh
new file mode 100755
index 0000000000000..825e776fabf16
--- /dev/null
+++ b/dev-support/hadoop-vote.sh
@@ -0,0 +1,204 @@
+#!/usr/bin/env bash
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# This script is useful to perform basic sanity tests for the given
+# Hadoop RC. It checks for the Checksum, Signature, Rat check,
+# Build from source and building tarball from the source.
+
+set -e -o pipefail
+
+usage() {
+ SCRIPT=$(basename "${BASH_SOURCE[@]}")
+
+ cat << __EOF
+hadoop-vote. A script for standard vote which verifies the following items
+1. Checksum of sources and binaries
+2. Signature of sources and binaries
+3. Rat check
+4. Built from source
+5. Built tar from source
+
+Usage: ${SCRIPT} -s | --source [-k | --key ] [-f | --keys-file-url ] [-o | --output-dir ] [-D property[=value]] [-P profiles]
+ ${SCRIPT} -h | --help
+
+ -h | --help Show this screen.
+ -s | --source '' A URL pointing to the release candidate sources and binaries
+ e.g. https://dist.apache.org/repos/dist/dev/hadoop/hadoop-RC0/
+ -k | --key '' A signature of the public key, e.g. 9AD2AE49
+ -f | --keys-file-url '' the URL of the key file, default is
+ https://downloads.apache.org/hadoop/common/KEYS
+ -o | --output-dir '' directory which has the stdout and stderr of each verification target
+ -D | list of maven properties to set for the mvn invocations, e.g. <-D hbase.profile=2.0 -D skipTests> Defaults to unset
+ -P | list of maven profiles to set for the build from source, e.g. <-P native -P yarn-ui>
+__EOF
+}
+
+MVN_PROPERTIES=()
+MVN_PROFILES=()
+
+while ((${#})); do
+ case "${1}" in
+ -h | --help )
+ usage; exit 0 ;;
+ -s | --source )
+ SOURCE_URL="${2}"; shift 2 ;;
+ -k | --key )
+ SIGNING_KEY="${2}"; shift 2 ;;
+ -f | --keys-file-url )
+ KEY_FILE_URL="${2}"; shift 2 ;;
+ -o | --output-dir )
+ OUTPUT_DIR="${2}"; shift 2 ;;
+ -D )
+ MVN_PROPERTIES+=("-D ${2}"); shift 2 ;;
+ -P )
+ MVN_PROFILES+=("-P ${2}"); shift 2 ;;
+ * )
+ usage >&2; exit 1 ;;
+ esac
+done
+
+# Source url must be provided
+if [ -z "${SOURCE_URL}" ]; then
+ usage;
+ exit 1
+fi
+
+cat << __EOF
+Although This tool helps verifying Hadoop RC build and unit tests,
+operator may still consider verifying the following manually:
+1. Verify the API compatibility report
+2. Integration/performance/benchmark tests
+3. Object store specific Integration tests against an endpoint
+4. Verify overall unit test stability from Jenkins builds or locally
+5. Other concerns if any
+__EOF
+
+[[ "${SOURCE_URL}" != */ ]] && SOURCE_URL="${SOURCE_URL}/"
+HADOOP_RC_VERSION=$(tr "/" "\n" <<< "${SOURCE_URL}" | tail -n2)
+HADOOP_VERSION=$(echo "${HADOOP_RC_VERSION}" | sed -e 's/-RC[0-9]//g' | sed -e 's/hadoop-//g')
+JAVA_VERSION=$(java -version 2>&1 | cut -f3 -d' ' | head -n1 | sed -e 's/"//g')
+OUTPUT_DIR="${OUTPUT_DIR:-$(pwd)}"
+
+if [ ! -d "${OUTPUT_DIR}" ]; then
+ echo "Output directory ${OUTPUT_DIR} does not exist, please create it before running this script."
+ exit 1
+fi
+
+OUTPUT_PATH_PREFIX="${OUTPUT_DIR}"/"${HADOOP_RC_VERSION}"
+
+# default value for verification targets, 0 = failed
+SIGNATURE_PASSED=0
+CHECKSUM_PASSED=0
+RAT_CHECK_PASSED=0
+BUILD_FROM_SOURCE_PASSED=0
+BUILD_TAR_FROM_SOURCE_PASSED=0
+
+function download_and_import_keys() {
+ KEY_FILE_URL="${KEY_FILE_URL:-https://downloads.apache.org/hadoop/common/KEYS}"
+ echo "Obtain and import the publisher key(s) from ${KEY_FILE_URL}"
+ # download the keys file into file KEYS
+ wget -O KEYS "${KEY_FILE_URL}"
+ gpg --import KEYS
+ if [ -n "${SIGNING_KEY}" ]; then
+ gpg --list-keys "${SIGNING_KEY}"
+ fi
+}
+
+function download_release_candidate () {
+ # get all files from release candidate repo
+ wget -r -np -N -nH --cut-dirs 4 "${SOURCE_URL}"
+}
+
+function verify_signatures() {
+ rm -f "${OUTPUT_PATH_PREFIX}"_verify_signatures
+ for file in *.tar.gz; do
+ gpg --verify "${file}".asc "${file}" 2>&1 | tee -a "${OUTPUT_PATH_PREFIX}"_verify_signatures && SIGNATURE_PASSED=1 || SIGNATURE_PASSED=0
+ done
+}
+
+function verify_checksums() {
+ rm -f "${OUTPUT_PATH_PREFIX}"_verify_checksums
+ SHA_EXT=$(find . -name "*.sha*" | awk -F '.' '{ print $NF }' | head -n 1)
+ for file in *.tar.gz; do
+ sha512sum --tag "${file}" > "${file}"."${SHA_EXT}".tmp
+ diff "${file}"."${SHA_EXT}".tmp "${file}"."${SHA_EXT}" 2>&1 | tee -a "${OUTPUT_PATH_PREFIX}"_verify_checksums && CHECKSUM_PASSED=1 || CHECKSUM_PASSED=0
+ rm -f "${file}"."${SHA_EXT}".tmp
+ done
+}
+
+function unzip_from_source() {
+ tar -zxvf hadoop-"${HADOOP_VERSION}"-src.tar.gz
+ cd hadoop-"${HADOOP_VERSION}"-src
+}
+
+function rat_test() {
+ rm -f "${OUTPUT_PATH_PREFIX}"_rat_test
+ mvn clean apache-rat:check "${MVN_PROPERTIES[@]}" 2>&1 | tee "${OUTPUT_PATH_PREFIX}"_rat_test && RAT_CHECK_PASSED=1
+}
+
+function build_from_source() {
+ rm -f "${OUTPUT_PATH_PREFIX}"_build_from_source
+ # No unit test run.
+ mvn clean install "${MVN_PROPERTIES[@]}" -DskipTests "${MVN_PROFILES[@]}" 2>&1 | tee "${OUTPUT_PATH_PREFIX}"_build_from_source && BUILD_FROM_SOURCE_PASSED=1
+}
+
+function build_tar_from_source() {
+ rm -f "${OUTPUT_PATH_PREFIX}"_build_tar_from_source
+ # No unit test run.
+ mvn clean package "${MVN_PROPERTIES[@]}" -Pdist -DskipTests -Dtar -Dmaven.javadoc.skip=true 2>&1 | tee "${OUTPUT_PATH_PREFIX}"_build_tar_from_source && BUILD_TAR_FROM_SOURCE_PASSED=1
+}
+
+function execute() {
+ ${1} || print_when_exit
+}
+
+function print_when_exit() {
+ cat << __EOF
+ * Signature: $( ((SIGNATURE_PASSED)) && echo "ok" || echo "failed" )
+ * Checksum : $( ((CHECKSUM_PASSED)) && echo "ok" || echo "failed" )
+ * Rat check (${JAVA_VERSION}): $( ((RAT_CHECK_PASSED)) && echo "ok" || echo "failed" )
+ - mvn clean apache-rat:check ${MVN_PROPERTIES[@]}
+ * Built from source (${JAVA_VERSION}): $( ((BUILD_FROM_SOURCE_PASSED)) && echo "ok" || echo "failed" )
+ - mvn clean install ${MVN_PROPERTIES[@]} -DskipTests ${MVN_PROFILES[@]}
+ * Built tar from source (${JAVA_VERSION}): $( ((BUILD_TAR_FROM_SOURCE_PASSED)) && echo "ok" || echo "failed" )
+ - mvn clean package ${MVN_PROPERTIES[@]} -Pdist -DskipTests -Dtar -Dmaven.javadoc.skip=true
+__EOF
+ if ((CHECKSUM_PASSED)) && ((SIGNATURE_PASSED)) && ((RAT_CHECK_PASSED)) && ((BUILD_FROM_SOURCE_PASSED)) && ((BUILD_TAR_FROM_SOURCE_PASSED)) ; then
+ exit 0
+ fi
+ exit 1
+}
+
+pushd "${OUTPUT_DIR}"
+
+download_and_import_keys
+download_release_candidate
+
+pushd "${HADOOP_RC_VERSION}"
+
+execute verify_signatures
+execute verify_checksums
+execute unzip_from_source
+execute rat_test
+execute build_from_source
+execute build_tar_from_source
+
+popd
+popd
+
+print_when_exit
diff --git a/dev-support/jenkins.sh b/dev-support/jenkins.sh
index 7ead90b5da24d..1bb080d19cabc 100644
--- a/dev-support/jenkins.sh
+++ b/dev-support/jenkins.sh
@@ -149,10 +149,6 @@ function run_ci() {
# enable writing back to Github
YETUS_ARGS+=("--github-token=${GITHUB_TOKEN}")
- # enable writing back to ASF JIRA
- YETUS_ARGS+=("--jira-password=${JIRA_PASSWORD}")
- YETUS_ARGS+=("--jira-user=${JIRA_USER}")
-
# auto-kill any surefire stragglers during unit test runs
YETUS_ARGS+=("--reapermode=kill")
@@ -173,7 +169,7 @@ function run_ci() {
YETUS_ARGS+=("--build-url-artifacts=artifact/out")
# plugins to enable
- YETUS_ARGS+=("--plugins=all")
+ YETUS_ARGS+=("--plugins=all,-jira")
# don't let these tests cause -1s because we aren't really paying that
# much attention to them
diff --git a/hadoop-build-tools/src/main/resources/checkstyle/checkstyle.xml b/hadoop-build-tools/src/main/resources/checkstyle/checkstyle.xml
index 73012dd47a864..ca8d137dd5e49 100644
--- a/hadoop-build-tools/src/main/resources/checkstyle/checkstyle.xml
+++ b/hadoop-build-tools/src/main/resources/checkstyle/checkstyle.xml
@@ -122,9 +122,8 @@
-
-
-
+
+
diff --git a/hadoop-client-modules/hadoop-client-api/pom.xml b/hadoop-client-modules/hadoop-client-api/pom.xml
index d0d62f5277b9b..b4b81011eb517 100644
--- a/hadoop-client-modules/hadoop-client-api/pom.xml
+++ b/hadoop-client-modules/hadoop-client-api/pom.xml
@@ -98,13 +98,6 @@
truetrue
-
-
- org.apache.hadoop
- hadoop-maven-plugins
- ${project.version}
-
- package
@@ -161,6 +154,9 @@
org/xerial/snappy/*org/xerial/snappy/**/*
+
+ org/wildfly/openssl/*
+ org/wildfly/openssl/**/*
@@ -179,6 +175,8 @@
com/sun/security/**/*com/sun/jndi/**/*com/sun/management/**/*
+ com/ibm/security/*
+ com/ibm/security/**/*
@@ -249,8 +247,7 @@
-
-
+ NOTICE.txt
diff --git a/hadoop-client-modules/hadoop-client-check-invariants/pom.xml b/hadoop-client-modules/hadoop-client-check-invariants/pom.xml
index 9d4bce1fddccd..b1c00678406d7 100644
--- a/hadoop-client-modules/hadoop-client-check-invariants/pom.xml
+++ b/hadoop-client-modules/hadoop-client-check-invariants/pom.xml
@@ -56,7 +56,7 @@
org.codehaus.mojoextra-enforcer-rules
- 1.0-beta-3
+ 1.5.1
diff --git a/hadoop-client-modules/hadoop-client-check-test-invariants/pom.xml b/hadoop-client-modules/hadoop-client-check-test-invariants/pom.xml
index 635250ec1ae1f..0e576ac6f0666 100644
--- a/hadoop-client-modules/hadoop-client-check-test-invariants/pom.xml
+++ b/hadoop-client-modules/hadoop-client-check-test-invariants/pom.xml
@@ -60,7 +60,7 @@
org.codehaus.mojoextra-enforcer-rules
- 1.0-beta-3
+ 1.5.1
diff --git a/hadoop-client-modules/hadoop-client-integration-tests/pom.xml b/hadoop-client-modules/hadoop-client-integration-tests/pom.xml
index 967782163f7e8..ba593ebd1b42d 100644
--- a/hadoop-client-modules/hadoop-client-integration-tests/pom.xml
+++ b/hadoop-client-modules/hadoop-client-integration-tests/pom.xml
@@ -184,6 +184,12 @@
hadoop-hdfstesttest-jar
+
+
+ org.ow2.asm
+ asm-commons
+
+ org.apache.hadoop
diff --git a/hadoop-client-modules/hadoop-client-minicluster/pom.xml b/hadoop-client-modules/hadoop-client-minicluster/pom.xml
index c9ce6f297d22a..208345d5f5a53 100644
--- a/hadoop-client-modules/hadoop-client-minicluster/pom.xml
+++ b/hadoop-client-modules/hadoop-client-minicluster/pom.xml
@@ -332,6 +332,10 @@
org.apache.hadoop.thirdpartyhadoop-shaded-guava
+
+ org.ow2.asm
+ asm-commons
+
- com.sun.jersey
- jersey-core
+ com.sun.jersey
+ jersey-coretrue
@@ -419,29 +423,25 @@
true
- com.sun.jersey
+ com.github.pjfanningjersey-jsontrue
- javax.xml.bind
- jaxb-api
-
-
- org.codehaus.jackson
- jackson-core-asl
+ com.fasterxml.jackson.core
+ jackson-core
- org.codehaus.jackson
- jackson-mapper-asl
+ com.fasterxml.jackson.core
+ jackson-databind
- org.codehaus.jackson
- jackson-jaxrs
+ com.fasterxml.jackson.jaxrs
+ jackson-jaxrs-json-provider
- org.codehaus.jackson
- jackson-xc
+ javax.xml.bind
+ jaxb-api
@@ -451,9 +451,23 @@
true
- com.sun.jersey
- jersey-servlet
+ com.sun.jersey
+ jersey-servlettrue
+
+
+ javax.servlet
+ servlet-api
+
+
+ javax.enterprise
+ cdi-api
+
+
+ ch.qos.cal10n
+ cal10n-api
+
+
@@ -657,13 +671,6 @@
org.apache.maven.pluginsmaven-shade-plugin
-
-
- org.apache.hadoop
- hadoop-maven-plugins
- ${project.version}
-
- package
@@ -736,6 +743,18 @@
testdata/*
+
+ com.fasterxml.jackson.*:*
+
+ META-INF/versions/11/module-info.class
+
+
+
+ com.google.code.gson:gson
+
+ META-INF/versions/9/module-info.class
+
+
@@ -919,6 +938,8 @@
com/sun/security/**/*com/sun/jndi/**/*com/sun/management/**/*
+ com/ibm/security/*
+ com/ibm/security/**/*
@@ -1024,8 +1045,7 @@
-
-
+
diff --git a/hadoop-client-modules/hadoop-client-runtime/pom.xml b/hadoop-client-modules/hadoop-client-runtime/pom.xml
index b6a71e50c41ab..d5185f0fffc41 100644
--- a/hadoop-client-modules/hadoop-client-runtime/pom.xml
+++ b/hadoop-client-modules/hadoop-client-runtime/pom.xml
@@ -128,13 +128,6 @@
org.apache.maven.pluginsmaven-shade-plugin
-
-
- org.apache.hadoop
- hadoop-maven-plugins
- ${project.version}
-
- package
@@ -155,6 +148,7 @@
com.google.code.findbugs:jsr305
+ io.netty:*io.dropwizard.metrics:metrics-coreorg.eclipse.jetty:jetty-servletorg.eclipse.jetty:jetty-security
@@ -163,6 +157,8 @@
org.bouncycastle:*org.xerial.snappy:*
+
+ org.jetbrains.kotlin:*
@@ -242,6 +238,19 @@
google/protobuf/**/*.proto
+
+ com.fasterxml.jackson.*:*
+
+ META-INF/versions/11/module-info.class
+
+
+
+ com.google.code.gson:gson
+
+ META-INF/versions/9/module-info.class
+
+
+
@@ -288,6 +297,8 @@
com/sun/security/**/*com/sun/jndi/**/*com/sun/management/**/*
+ com/ibm/security/*
+ com/ibm/security/**/*
@@ -382,8 +393,7 @@
-->
-
-
+
diff --git a/hadoop-client-modules/hadoop-client/pom.xml b/hadoop-client-modules/hadoop-client/pom.xml
index dced359b286d9..5299c9e8713df 100644
--- a/hadoop-client-modules/hadoop-client/pom.xml
+++ b/hadoop-client-modules/hadoop-client/pom.xml
@@ -66,7 +66,7 @@
jersey-core
- com.sun.jersey
+ com.github.pjfanningjersey-json
@@ -114,6 +114,18 @@
org.eclipse.jettyjetty-server
+
+ org.jetbrains.kotlin
+ kotlin-stdlib
+
+
+ org.jetbrains.kotlin
+ kotlin-stdlib-common
+
+
+ com.squareup.okhttp3
+ okhttp
+ com.sun.jerseyjersey-core
@@ -167,7 +179,7 @@
jersey-core
- com.sun.jersey
+ com.github.pjfanningjersey-json
@@ -218,7 +230,7 @@
jersey-server
- com.sun.jersey
+ com.github.pjfanningjersey-json
@@ -275,7 +287,7 @@
guice-servlet
- com.sun.jersey
+ com.github.pjfanningjersey-json
diff --git a/hadoop-cloud-storage-project/hadoop-cloud-storage/pom.xml b/hadoop-cloud-storage-project/hadoop-cloud-storage/pom.xml
index a8f45a7f3a222..6c8a0916802f2 100644
--- a/hadoop-cloud-storage-project/hadoop-cloud-storage/pom.xml
+++ b/hadoop-cloud-storage-project/hadoop-cloud-storage/pom.xml
@@ -101,6 +101,10 @@
org.apache.zookeeperzookeeper
+
+ org.projectlombok
+ lombok
+
@@ -123,11 +127,6 @@
hadoop-azure-datalakecompile
-
- org.apache.hadoop
- hadoop-openstack
- compile
- org.apache.hadoophadoop-cos
diff --git a/hadoop-cloud-storage-project/hadoop-cos/pom.xml b/hadoop-cloud-storage-project/hadoop-cos/pom.xml
index 0e801c4e439ac..ca7c4bf516cad 100644
--- a/hadoop-cloud-storage-project/hadoop-cos/pom.xml
+++ b/hadoop-cloud-storage-project/hadoop-cos/pom.xml
@@ -96,38 +96,6 @@
-
- org.apache.maven.plugins
- maven-enforcer-plugin
-
-
- de.skuzzle.enforcer
- restrict-imports-enforcer-rule
- ${restrict-imports.enforcer.version}
-
-
-
-
- banned-illegal-imports
- process-sources
-
- enforce
-
-
-
-
- true
- Use hadoop-common provided VisibleForTesting rather than the one provided by Guava
-
- org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting
- com.google.common.annotations.VisibleForTesting
-
-
-
-
-
-
-
@@ -141,7 +109,7 @@
com.qcloudcos_api-bundle
- 5.6.19
+ 5.6.69compile
diff --git a/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/auth/COSCredentialsProviderList.java b/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/auth/COSCredentialsProviderList.java
index d2d2f8c9a7cab..66ef4b1c6fd87 100644
--- a/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/auth/COSCredentialsProviderList.java
+++ b/hadoop-cloud-storage-project/hadoop-cos/src/main/java/org/apache/hadoop/fs/cosn/auth/COSCredentialsProviderList.java
@@ -24,7 +24,7 @@
import java.util.concurrent.atomic.AtomicBoolean;
import java.util.concurrent.atomic.AtomicInteger;
-import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions;
+import org.apache.hadoop.util.Preconditions;
import com.qcloud.cos.auth.AnonymousCOSCredentials;
import com.qcloud.cos.auth.COSCredentials;
import com.qcloud.cos.auth.COSCredentialsProvider;
diff --git a/hadoop-cloud-storage-project/hadoop-huaweicloud/pom.xml b/hadoop-cloud-storage-project/hadoop-huaweicloud/pom.xml
index 7a9ee61836e1c..b96883b9ac80d 100755
--- a/hadoop-cloud-storage-project/hadoop-huaweicloud/pom.xml
+++ b/hadoop-cloud-storage-project/hadoop-huaweicloud/pom.xml
@@ -92,38 +92,6 @@
-
- org.apache.maven.plugins
- maven-enforcer-plugin
-
-
- de.skuzzle.enforcer
- restrict-imports-enforcer-rule
- ${restrict-imports.enforcer.version}
-
-
-
-
- banned-illegal-imports
- process-sources
-
- enforce
-
-
-
-
- true
- Use hadoop-common provided VisibleForTesting rather than the one provided by Guava
-
- org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting
- com.google.common.annotations.VisibleForTesting
-
-
-
-
-
-
-
@@ -132,10 +100,14 @@
hadoop-commonprovided
-
- jdk.tools
- jdk.tools
-
+
+ jdk.tools
+ jdk.tools
+
+
+ org.javassist
+ javassist
+
@@ -193,6 +165,14 @@
okiocom.squareup.okio
+
+ log4j-core
+ org.apache.logging.log4j
+
+
+ log4j-api
+ org.apache.logging.log4j
+
@@ -208,4 +188,4 @@
test
-
\ No newline at end of file
+
diff --git a/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/java/org/apache/hadoop/fs/obs/OBSBlockOutputStream.java b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/java/org/apache/hadoop/fs/obs/OBSBlockOutputStream.java
index cefa897927790..22c6cb5c350c9 100644
--- a/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/java/org/apache/hadoop/fs/obs/OBSBlockOutputStream.java
+++ b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/java/org/apache/hadoop/fs/obs/OBSBlockOutputStream.java
@@ -19,7 +19,7 @@
package org.apache.hadoop.fs.obs;
import org.apache.hadoop.classification.VisibleForTesting;
-import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions;
+import org.apache.hadoop.util.Preconditions;
import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.Futures;
import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListenableFuture;
import org.apache.hadoop.thirdparty.com.google.common.util.concurrent.ListeningExecutorService;
diff --git a/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/java/org/apache/hadoop/fs/obs/OBSCommonUtils.java b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/java/org/apache/hadoop/fs/obs/OBSCommonUtils.java
index d477cec186b0e..3a06961d3acd9 100644
--- a/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/java/org/apache/hadoop/fs/obs/OBSCommonUtils.java
+++ b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/java/org/apache/hadoop/fs/obs/OBSCommonUtils.java
@@ -18,7 +18,7 @@
package org.apache.hadoop.fs.obs;
-import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions;
+import org.apache.hadoop.util.Preconditions;
import com.obs.services.ObsClient;
import com.obs.services.exception.ObsException;
import com.obs.services.model.AbortMultipartUploadRequest;
diff --git a/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/java/org/apache/hadoop/fs/obs/OBSDataBlocks.java b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/java/org/apache/hadoop/fs/obs/OBSDataBlocks.java
index b58eaa00aa697..e347970ee8446 100644
--- a/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/java/org/apache/hadoop/fs/obs/OBSDataBlocks.java
+++ b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/java/org/apache/hadoop/fs/obs/OBSDataBlocks.java
@@ -19,7 +19,7 @@
package org.apache.hadoop.fs.obs;
import org.apache.hadoop.classification.VisibleForTesting;
-import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions;
+import org.apache.hadoop.util.Preconditions;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.FSExceptionMessages;
diff --git a/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/java/org/apache/hadoop/fs/obs/OBSIOException.java b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/java/org/apache/hadoop/fs/obs/OBSIOException.java
index 29a92c71919a8..3f99fd610efa5 100644
--- a/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/java/org/apache/hadoop/fs/obs/OBSIOException.java
+++ b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/java/org/apache/hadoop/fs/obs/OBSIOException.java
@@ -18,7 +18,7 @@
package org.apache.hadoop.fs.obs;
-import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions;
+import org.apache.hadoop.util.Preconditions;
import com.obs.services.exception.ObsException;
import java.io.IOException;
diff --git a/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/java/org/apache/hadoop/fs/obs/OBSInputStream.java b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/java/org/apache/hadoop/fs/obs/OBSInputStream.java
index e94565a4d760a..3f7e9888889b5 100644
--- a/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/java/org/apache/hadoop/fs/obs/OBSInputStream.java
+++ b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/java/org/apache/hadoop/fs/obs/OBSInputStream.java
@@ -18,7 +18,7 @@
package org.apache.hadoop.fs.obs;
-import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions;
+import org.apache.hadoop.util.Preconditions;
import com.obs.services.ObsClient;
import com.obs.services.exception.ObsException;
import com.obs.services.model.GetObjectRequest;
diff --git a/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/java/org/apache/hadoop/fs/obs/OBSWriteOperationHelper.java b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/java/org/apache/hadoop/fs/obs/OBSWriteOperationHelper.java
index 5cc3008f1dcfb..2b02f962a0598 100644
--- a/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/java/org/apache/hadoop/fs/obs/OBSWriteOperationHelper.java
+++ b/hadoop-cloud-storage-project/hadoop-huaweicloud/src/main/java/org/apache/hadoop/fs/obs/OBSWriteOperationHelper.java
@@ -18,7 +18,7 @@
package org.apache.hadoop.fs.obs;
-import org.apache.hadoop.thirdparty.com.google.common.base.Preconditions;
+import org.apache.hadoop.util.Preconditions;
import com.obs.services.ObsClient;
import com.obs.services.exception.ObsException;
import com.obs.services.model.AbortMultipartUploadRequest;
diff --git a/hadoop-common-project/hadoop-auth-examples/src/main/java/org/apache/hadoop/security/authentication/examples/RequestLoggerFilter.java b/hadoop-common-project/hadoop-auth-examples/src/main/java/org/apache/hadoop/security/authentication/examples/RequestLoggerFilter.java
index 80489917e0857..e96e5062919a7 100644
--- a/hadoop-common-project/hadoop-auth-examples/src/main/java/org/apache/hadoop/security/authentication/examples/RequestLoggerFilter.java
+++ b/hadoop-common-project/hadoop-auth-examples/src/main/java/org/apache/hadoop/security/authentication/examples/RequestLoggerFilter.java
@@ -116,6 +116,7 @@ private List getHeaderValues(String name, boolean reset) {
public void addCookie(Cookie cookie) {
super.addCookie(cookie);
List cookies = getHeaderValues("Set-Cookie", false);
+ cookies.addAll(getHeaderValues("set-cookie", false));
cookies.add(cookie.getName() + "=" + cookie.getValue());
}
diff --git a/hadoop-common-project/hadoop-auth/pom.xml b/hadoop-common-project/hadoop-auth/pom.xml
index d21d6273fffd4..6eaa4fdfce5b4 100644
--- a/hadoop-common-project/hadoop-auth/pom.xml
+++ b/hadoop-common-project/hadoop-auth/pom.xml
@@ -248,40 +248,6 @@
${basedir}/dev-support/findbugsExcludeFile.xml
-
- org.apache.maven.plugins
- maven-enforcer-plugin
-
-
- de.skuzzle.enforcer
- restrict-imports-enforcer-rule
- ${restrict-imports.enforcer.version}
-
-
-
-
- banned-illegal-imports
- process-sources
-
- enforce
-
-
-
-
- true
- Use hadoop-common provided implementations rather than the one provided by Guava
-
- org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting
- com.google.common.annotations.VisibleForTesting
- org.apache.hadoop.thirdparty.com.google.common.base.Preconditions
- com.google.common.base.Preconditions
-
-
-
-
-
-
-
diff --git a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/client/AuthenticatedURL.java b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/client/AuthenticatedURL.java
index 32f4edfbc5710..cb7d36368aa39 100644
--- a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/client/AuthenticatedURL.java
+++ b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/client/AuthenticatedURL.java
@@ -92,6 +92,9 @@ public synchronized Map> get(URI uri,
@Override
public void put(URI uri, Map> responseHeaders) {
List headers = responseHeaders.get("Set-Cookie");
+ if (headers == null) {
+ headers = responseHeaders.get("set-cookie");
+ }
if (headers != null) {
for (String header : headers) {
List cookies;
diff --git a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/client/KerberosAuthenticator.java b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/client/KerberosAuthenticator.java
index 06b63c1b9916c..30e65efe10cba 100644
--- a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/client/KerberosAuthenticator.java
+++ b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/client/KerberosAuthenticator.java
@@ -280,6 +280,9 @@ private boolean isNegotiate(HttpURLConnection conn) throws IOException {
boolean negotiate = false;
if (conn.getResponseCode() == HttpURLConnection.HTTP_UNAUTHORIZED) {
String authHeader = conn.getHeaderField(WWW_AUTHENTICATE);
+ if (authHeader == null) {
+ authHeader = conn.getHeaderField(WWW_AUTHENTICATE.toLowerCase());
+ }
negotiate = authHeader != null && authHeader.trim().startsWith(NEGOTIATE);
}
return negotiate;
@@ -388,6 +391,9 @@ private byte[] readToken(HttpURLConnection conn)
int status = conn.getResponseCode();
if (status == HttpURLConnection.HTTP_OK || status == HttpURLConnection.HTTP_UNAUTHORIZED) {
String authHeader = conn.getHeaderField(WWW_AUTHENTICATE);
+ if (authHeader == null) {
+ authHeader = conn.getHeaderField(WWW_AUTHENTICATE.toLowerCase());
+ }
if (authHeader == null || !authHeader.trim().startsWith(NEGOTIATE)) {
throw new AuthenticationException("Invalid SPNEGO sequence, '" + WWW_AUTHENTICATE +
"' header incorrect: " + authHeader);
diff --git a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/server/AuthenticationFilter.java b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/server/AuthenticationFilter.java
index 3658bd8b8ec01..7cc70c493c0f6 100644
--- a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/server/AuthenticationFilter.java
+++ b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/server/AuthenticationFilter.java
@@ -616,7 +616,9 @@ && getMaxInactiveInterval() > 0) {
// present.. reset to 403 if not found..
if ((errCode == HttpServletResponse.SC_UNAUTHORIZED)
&& (!httpResponse.containsHeader(
- KerberosAuthenticator.WWW_AUTHENTICATE))) {
+ KerberosAuthenticator.WWW_AUTHENTICATE)
+ && !httpResponse.containsHeader(
+ KerberosAuthenticator.WWW_AUTHENTICATE.toLowerCase()))) {
errCode = HttpServletResponse.SC_FORBIDDEN;
}
// After Jetty 9.4.21, sendError() no longer allows a custom message.
diff --git a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/CertificateUtil.java b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/CertificateUtil.java
index cf17aca15ceac..f25602c67d4a3 100644
--- a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/CertificateUtil.java
+++ b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/CertificateUtil.java
@@ -18,7 +18,6 @@
package org.apache.hadoop.security.authentication.util;
import java.io.ByteArrayInputStream;
-import java.io.UnsupportedEncodingException;
import java.nio.charset.StandardCharsets;
import java.security.PublicKey;
import java.security.cert.CertificateException;
diff --git a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/JaasConfiguration.java b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/JaasConfiguration.java
new file mode 100644
index 0000000000000..d03e630cedf7e
--- /dev/null
+++ b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/JaasConfiguration.java
@@ -0,0 +1,77 @@
+/**
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License. See accompanying LICENSE file.
+ */
+package org.apache.hadoop.security.authentication.util;
+
+import java.util.HashMap;
+import java.util.Map;
+import javax.security.auth.login.AppConfigurationEntry;
+import javax.security.auth.login.Configuration;
+
+
+/**
+ * Creates a programmatic version of a jaas.conf file. This can be used
+ * instead of writing a jaas.conf file and setting the system property,
+ * "java.security.auth.login.config", to point to that file. It is meant to be
+ * used for connecting to ZooKeeper.
+ */
+public class JaasConfiguration extends Configuration {
+
+ private final javax.security.auth.login.Configuration baseConfig =
+ javax.security.auth.login.Configuration.getConfiguration();
+ private final AppConfigurationEntry[] entry;
+ private final String entryName;
+
+ /**
+ * Add an entry to the jaas configuration with the passed in name,
+ * principal, and keytab. The other necessary options will be set for you.
+ *
+ * @param entryName The name of the entry (e.g. "Client")
+ * @param principal The principal of the user
+ * @param keytab The location of the keytab
+ */
+ public JaasConfiguration(String entryName, String principal, String keytab) {
+ this.entryName = entryName;
+ Map options = new HashMap<>();
+ options.put("keyTab", keytab);
+ options.put("principal", principal);
+ options.put("useKeyTab", "true");
+ options.put("storeKey", "true");
+ options.put("useTicketCache", "false");
+ options.put("refreshKrb5Config", "true");
+ String jaasEnvVar = System.getenv("HADOOP_JAAS_DEBUG");
+ if ("true".equalsIgnoreCase(jaasEnvVar)) {
+ options.put("debug", "true");
+ }
+ entry = new AppConfigurationEntry[]{
+ new AppConfigurationEntry(getKrb5LoginModuleName(),
+ AppConfigurationEntry.LoginModuleControlFlag.REQUIRED,
+ options)};
+ }
+
+ @Override
+ public AppConfigurationEntry[] getAppConfigurationEntry(String name) {
+ return (entryName.equals(name)) ? entry : ((baseConfig != null)
+ ? baseConfig.getAppConfigurationEntry(name) : null);
+ }
+
+ private String getKrb5LoginModuleName() {
+ String krb5LoginModuleName;
+ if (System.getProperty("java.vendor").contains("IBM")) {
+ krb5LoginModuleName = "com.ibm.security.auth.module.Krb5LoginModule";
+ } else {
+ krb5LoginModuleName = "com.sun.security.auth.module.Krb5LoginModule";
+ }
+ return krb5LoginModuleName;
+ }
+}
diff --git a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/KerberosUtil.java b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/KerberosUtil.java
index fc6f957b9622e..5125be078d67b 100644
--- a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/KerberosUtil.java
+++ b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/KerberosUtil.java
@@ -236,7 +236,7 @@ public static final String getServicePrincipal(String service,
*/
static final String[] getPrincipalNames(String keytabFileName) throws IOException {
Keytab keytab = Keytab.loadKeytab(new File(keytabFileName));
- Set principals = new HashSet();
+ Set principals = new HashSet<>();
List entries = keytab.getPrincipals();
for (PrincipalName entry : entries) {
principals.add(entry.getName().replace("\\", "/"));
diff --git a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/ZKSignerSecretProvider.java b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/ZKSignerSecretProvider.java
index 374f4a5665796..53b6f4d239f98 100644
--- a/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/ZKSignerSecretProvider.java
+++ b/hadoop-common-project/hadoop-auth/src/main/java/org/apache/hadoop/security/authentication/util/ZKSignerSecretProvider.java
@@ -17,12 +17,9 @@
import java.nio.ByteBuffer;
import java.security.SecureRandom;
import java.util.Collections;
-import java.util.HashMap;
import java.util.List;
-import java.util.Map;
import java.util.Properties;
import java.util.Random;
-import javax.security.auth.login.AppConfigurationEntry;
import javax.security.auth.login.Configuration;
import javax.servlet.ServletContext;
import org.apache.curator.RetryPolicy;
@@ -429,62 +426,4 @@ public List getAclForPath(String path) {
return saslACL;
}
}
-
- /**
- * Creates a programmatic version of a jaas.conf file. This can be used
- * instead of writing a jaas.conf file and setting the system property,
- * "java.security.auth.login.config", to point to that file. It is meant to be
- * used for connecting to ZooKeeper.
- */
- @InterfaceAudience.Private
- public static class JaasConfiguration extends Configuration {
-
- private final javax.security.auth.login.Configuration baseConfig =
- javax.security.auth.login.Configuration.getConfiguration();
- private static AppConfigurationEntry[] entry;
- private String entryName;
-
- /**
- * Add an entry to the jaas configuration with the passed in name,
- * principal, and keytab. The other necessary options will be set for you.
- *
- * @param entryName The name of the entry (e.g. "Client")
- * @param principal The principal of the user
- * @param keytab The location of the keytab
- */
- public JaasConfiguration(String entryName, String principal, String keytab) {
- this.entryName = entryName;
- Map options = new HashMap();
- options.put("keyTab", keytab);
- options.put("principal", principal);
- options.put("useKeyTab", "true");
- options.put("storeKey", "true");
- options.put("useTicketCache", "false");
- options.put("refreshKrb5Config", "true");
- String jaasEnvVar = System.getenv("HADOOP_JAAS_DEBUG");
- if (jaasEnvVar != null && "true".equalsIgnoreCase(jaasEnvVar)) {
- options.put("debug", "true");
- }
- entry = new AppConfigurationEntry[]{
- new AppConfigurationEntry(getKrb5LoginModuleName(),
- AppConfigurationEntry.LoginModuleControlFlag.REQUIRED,
- options)};
- }
-
- @Override
- public AppConfigurationEntry[] getAppConfigurationEntry(String name) {
- return (entryName.equals(name)) ? entry : ((baseConfig != null)
- ? baseConfig.getAppConfigurationEntry(name) : null);
- }
-
- private String getKrb5LoginModuleName() {
- String krb5LoginModuleName;
- if (System.getProperty("java.vendor").contains("IBM")) {
- krb5LoginModuleName = "com.ibm.security.auth.module.Krb5LoginModule";
- } else {
- krb5LoginModuleName = "com.sun.security.auth.module.Krb5LoginModule";
- }
- return krb5LoginModuleName;
- }
- }
}
diff --git a/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/KerberosTestUtils.java b/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/KerberosTestUtils.java
index 8fc08e2171f67..293871bcd0620 100644
--- a/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/KerberosTestUtils.java
+++ b/hadoop-common-project/hadoop-auth/src/test/java/org/apache/hadoop/security/authentication/KerberosTestUtils.java
@@ -108,9 +108,9 @@ public AppConfigurationEntry[] getAppConfigurationEntry(String name) {
public static T doAs(String principal, final Callable callable) throws Exception {
LoginContext loginContext = null;
try {
- Set principals = new HashSet();
+ Set principals = new HashSet<>();
principals.add(new KerberosPrincipal(KerberosTestUtils.getClientPrincipal()));
- Subject subject = new Subject(false, principals, new HashSet
+
+ @return The StorageStatistics for this FileSystem instance.
+ Will never be null.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ All user code that may potentially use the Hadoop Distributed
+ File System should be written to use a FileSystem object or its
+ successor, {@link FileContext}.
+
+
+ The local implementation is {@link LocalFileSystem} and distributed
+ implementation is DistributedFileSystem. There are other implementations
+ for object stores and (outside the Apache Hadoop codebase),
+ third party filesystems.
+
+ Notes
+
+
The behaviour of the filesystem is
+
+ specified in the Hadoop documentation.
+ However, the normative specification of the behavior of this class is
+ actually HDFS: if HDFS does not behave the way these Javadocs or
+ the specification in the Hadoop documentations define, assume that
+ the documentation is incorrect.
+
+
The term {@code FileSystem} refers to an instance of this class.
+
The acronym "FS" is used as an abbreviation of FileSystem.
+
The term {@code filesystem} refers to the distributed/local filesystem
+ itself, rather than the class used to interact with it.
+
The term "file" refers to a file in the remote filesystem,
+ rather than instances of {@code java.io.File}.
/tmp -> hdfs://nnTmp/privateTmpForUserXXX
+
+
+ ViewFs is specified with the following URI: viewfs:///
+
+ To use viewfs one would typically set the default file system in the
+ config (i.e. fs.defaultFS < = viewfs:///) along with the
+ mount table config variables as described below.
+
+
+ ** Config variables to specify the mount table entries **
+
+
+ The file system is initialized from the standard Hadoop config through
+ config variables.
+ See {@link FsConstants} for URI and Scheme constants;
+ See {@link Constants} for config var constants;
+ see {@link ConfigUtil} for convenient lib.
+
+
+ All the mount table config entries for view fs are prefixed by
+ fs.viewfs.mounttable.
+ For example the above example can be specified with the following
+ config variables:
+
+
+ The default mount table (when no authority is specified) is
+ from config variables prefixed by fs.viewFs.mounttable.default
+ The authority component of a URI can be used to specify a different mount
+ table. For example,
+
+
viewfs://sanjayMountable/
+
+ is initialized from fs.viewFs.mounttable.sanjayMountable.* config variables.
+
+
+ **** Merge Mounts **** (NOTE: merge mounts are not implemented yet.)
+
+
+ One can also use "MergeMounts" to merge several directories (this is
+ sometimes called union-mounts or junction-mounts in the literature.
+ For example of the home directories are stored on say two file systems
+ (because they do not fit on one) then one could specify a mount
+ entry such as following merges two dirs:
+
+ In this cases the root of the mount table is merged with the root of
+ hdfs://nn99/ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Since these methods are often vendor- or device-specific, operators
+ may implement this interface in order to achieve fencing.
+
+ Fencing is configured by the operator as an ordered list of methods to
+ attempt. Each method will be tried in turn, and the next in the list
+ will only be attempted if the previous one fails. See {@link NodeFencer}
+ for more information.
+
+ If an implementation also implements {@link Configurable} then its
+ setConf method will be called upon instantiation.]]>
+
StaticUserWebFilter - An authorization plugin that makes all
+users a static configured user.
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ public class IntArrayWritable extends ArrayWritable {
+ public IntArrayWritable() {
+ super(IntWritable.class);
+ }
+ }
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a ByteWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ the class of the item
+ @param conf the configuration to store
+ @param item the object to be stored
+ @param keyName the name of the key to use
+ @throws IOException : forwards Exceptions from the underlying
+ {@link Serialization} classes.]]>
+
+
+
+
+
+
+
+
+ the class of the item
+ @param conf the configuration to use
+ @param keyName the name of the key to use
+ @param itemClass the class of the item
+ @return restored object
+ @throws IOException : forwards Exceptions from the underlying
+ {@link Serialization} classes.]]>
+
+
+
+
+
+
+
+
+ the class of the item
+ @param conf the configuration to use
+ @param items the objects to be stored
+ @param keyName the name of the key to use
+ @throws IndexOutOfBoundsException if the items array is empty
+ @throws IOException : forwards Exceptions from the underlying
+ {@link Serialization} classes.]]>
+
+
+
+
+
+
+
+
+ the class of the item
+ @param conf the configuration to use
+ @param keyName the name of the key to use
+ @param itemClass the class of the item
+ @return restored object
+ @throws IOException : forwards Exceptions from the underlying
+ {@link Serialization} classes.]]>
+
+
+
+
+ DefaultStringifier offers convenience methods to store/load objects to/from
+ the configuration.
+
+ @param the class of the objects to stringify]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a DoubleWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ value argument is null or
+ its size is zero, the elementType argument must not be null. If
+ the argument value's size is bigger than zero, the argument
+ elementType is not be used.
+
+ @param value
+ @param elementType]]>
+
+
+
+
+ value should not be null
+ or empty.
+
+ @param value]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ value and elementType. If the value argument
+ is null or its size is zero, the elementType argument must not be
+ null. If the argument value's size is bigger than zero, the
+ argument elementType is not be used.
+
+ @param value
+ @param elementType]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is an EnumSetWritable with the same value,
+ or both are null.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a FloatWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ When two sequence files, which have same Key type but different Value
+ types, are mapped out to reduce, multiple Value types is not allowed.
+ In this case, this class can help you wrap instances with different types.
+
+
+
+ Compared with ObjectWritable, this class is much more effective,
+ because ObjectWritable will append the class declaration as a String
+ into the output file in every Key-Value pair.
+
+
+
+ Generic Writable implements {@link Configurable} interface, so that it will be
+ configured by the framework. The configuration is passed to the wrapped objects
+ implementing {@link Configurable} interface before deserialization.
+
+
+ how to use it:
+ 1. Write your own class, such as GenericObject, which extends GenericWritable.
+ 2. Implements the abstract method getTypes(), defines
+ the classes which will be wrapped in GenericObject in application.
+ Attention: this classes defined in getTypes() method, must
+ implement Writable interface.
+
+
+ @since Nov 8, 2006]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a IntWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ closes the input and output streams
+ at the end.
+
+ @param in InputStrem to read from
+ @param out OutputStream to write to
+ @param conf the Configuration object]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ignore any {@link Throwable} or
+ null pointers. Must only be used for cleanup in exception handlers.
+
+ @param log the log to record problems to at debug level. Can be null.
+ @param closeables the objects to close
+ @deprecated use {@link #cleanupWithLogger(Logger, java.io.Closeable...)}
+ instead]]>
+
+
+
+
+
+
+ ignore any {@link Throwable} or
+ null pointers. Must only be used for cleanup in exception handlers.
+
+ @param logger the log to record problems to at debug level. Can be null.
+ @param closeables the objects to close]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This is better than File#listDir because it does not ignore IOExceptions.
+
+ @param dir The directory to list.
+ @param filter If non-null, the filter to use when listing
+ this directory.
+ @return The list of files in the directory.
+
+ @throws IOException On I/O error]]>
+
+
+
+
+
+
+
+ Borrowed from Uwe Schindler in LUCENE-5588
+ @param fileToSync the file to fsync]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a LongWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ A map is a directory containing two files, the data file,
+ containing all keys and values in the map, and a smaller index
+ file, containing a fraction of the keys. The fraction is determined by
+ {@link Writer#getIndexInterval()}.
+
+
The index file is read entirely into memory. Thus key implementations
+ should try to keep themselves small.
+
+
Map files are created by adding entries in-order. To maintain a large
+ database, perform updates by copying the previous version of a database and
+ merging in a sorted change list, to create a new version of the database in
+ a new file. Sorting large change lists can be done with {@link
+ SequenceFile.Sorter}.]]>
+
SequenceFile provides {@link SequenceFile.Writer},
+ {@link SequenceFile.Reader} and {@link Sorter} classes for writing,
+ reading and sorting respectively.
+
+ There are three SequenceFileWriters based on the
+ {@link CompressionType} used to compress key/value pairs:
+
+
+ Writer : Uncompressed records.
+
+
+ RecordCompressWriter : Record-compressed files, only compress
+ values.
+
+
+ BlockCompressWriter : Block-compressed files, both keys &
+ values are collected in 'blocks'
+ separately and compressed. The size of
+ the 'block' is configurable.
+
+
+
The actual compression algorithm used to compress key and/or values can be
+ specified by using the appropriate {@link CompressionCodec}.
+
+
The recommended way is to use the static createWriter methods
+ provided by the SequenceFile to chose the preferred format.
+
+
The {@link SequenceFile.Reader} acts as the bridge and can read any of the
+ above SequenceFile formats.
+
+
SequenceFile Formats
+
+
Essentially there are 3 different formats for SequenceFiles
+ depending on the CompressionType specified. All of them share a
+ common header described below.
+
+
SequenceFile Header
+
+
+ version - 3 bytes of magic header SEQ, followed by 1 byte of actual
+ version number (e.g. SEQ4 or SEQ6)
+
+
+ keyClassName -key class
+
+
+ valueClassName - value class
+
+
+ compression - A boolean which specifies if compression is turned on for
+ keys/values in this file.
+
+
+ blockCompression - A boolean which specifies if block-compression is
+ turned on for keys/values in this file.
+
+
+ compression codec - CompressionCodec class which is used for
+ compression of keys and/or values (if compression is
+ enabled).
+
+
+ metadata - {@link Metadata} for this file.
+
+
+ sync - A sync marker to denote end of the header.
+
The compressed blocks of key lengths and value lengths consist of the
+ actual lengths of individual keys/values encoded in ZeroCompressedInteger
+ format.
+
+ @see CompressionCodec]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a ShortWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ the class of the objects to stringify]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ position. Note that this
+ method avoids using the converter or doing String instantiation
+ @return the Unicode scalar value at position or -1
+ if the position is invalid or points to a
+ trailing byte]]>
+
+
+
+
+
+
+
+
+
+ what in the backing
+ buffer, starting as position start. The starting
+ position is measured in bytes and the return value is in
+ terms of byte position in the buffer. The backing buffer is
+ not converted to a string for this operation.
+ @return byte position of the first occurence of the search
+ string in the UTF-8 buffer or -1 if not found]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Note: For performance reasons, this call does not clear the
+ underlying byte array that is retrievable via {@link #getBytes()}.
+ In order to free the byte-array memory, call {@link #set(byte[])}
+ with an empty byte array (For example, new byte[0]).]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a Text with the same contents.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ replace is true, then
+ malformed input is replaced with the
+ substitution character, which is U+FFFD. Otherwise the
+ method throws a MalformedInputException.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ replace is true, then
+ malformed input is replaced with the
+ substitution character, which is U+FFFD. Otherwise the
+ method throws a MalformedInputException.
+ @return ByteBuffer: bytes stores at ByteBuffer.array()
+ and length is ByteBuffer.limit()]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ In
+ addition, it provides methods for string traversal without converting the
+ byte array to a string.
Also includes utilities for
+ serializing/deserialing a string, coding/decoding a string, checking if a
+ byte array contains valid UTF8 code, calculating the length of an encoded
+ string.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This is useful when a class may evolve, so that instances written by the
+ old version of the class may still be processed by the new version. To
+ handle this situation, {@link #readFields(DataInput)}
+ implementations should catch {@link VersionMismatchException}.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a VIntWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a VLongWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ out.
+
+ @param out DataOuput to serialize this object into.
+ @throws IOException]]>
+
+
+
+
+
+
+ in.
+
+
For efficiency, implementations should attempt to re-use storage in the
+ existing object where possible.
+
+ @param in DataInput to deseriablize this object from.
+ @throws IOException]]>
+
+
+
+ Any key or value type in the Hadoop Map-Reduce
+ framework implements this interface.
+
+
Implementations typically implement a static read(DataInput)
+ method which constructs a new instance, calls {@link #readFields(DataInput)}
+ and returns the instance.
+
+
Example:
+
+ public class MyWritable implements Writable {
+ // Some data
+ private int counter;
+ private long timestamp;
+
+ public void write(DataOutput out) throws IOException {
+ out.writeInt(counter);
+ out.writeLong(timestamp);
+ }
+
+ public void readFields(DataInput in) throws IOException {
+ counter = in.readInt();
+ timestamp = in.readLong();
+ }
+
+ public static MyWritable read(DataInput in) throws IOException {
+ MyWritable w = new MyWritable();
+ w.readFields(in);
+ return w;
+ }
+ }
+
]]>
+
+
+
+
+
+
+
+
+ WritableComparables can be compared to each other, typically
+ via Comparators. Any type which is to be used as a
+ key in the Hadoop Map-Reduce framework should implement this
+ interface.
+
+
Note that hashCode() is frequently used in Hadoop to partition
+ keys. It's important that your implementation of hashCode() returns the same
+ result across different instances of the JVM. Note also that the default
+ hashCode() implementation in Object does not
+ satisfy this property.
+
+
Example:
+
+ public class MyWritableComparable implements WritableComparable {
+ // Some data
+ private int counter;
+ private long timestamp;
+
+ public void write(DataOutput out) throws IOException {
+ out.writeInt(counter);
+ out.writeLong(timestamp);
+ }
+
+ public void readFields(DataInput in) throws IOException {
+ counter = in.readInt();
+ timestamp = in.readLong();
+ }
+
+ public int compareTo(MyWritableComparable o) {
+ int thisValue = this.value;
+ int thatValue = o.value;
+ return (thisValue < thatValue ? -1 : (thisValue==thatValue ? 0 : 1));
+ }
+
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + counter;
+ result = prime * result + (int) (timestamp ^ (timestamp >>> 32));
+ return result
+ }
+ }
+
One may optimize compare-intensive operations by overriding
+ {@link #compare(byte[],int,int,byte[],int,int)}. Static utility methods are
+ provided to assist in optimized implementations of this method.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Enum type
+ @param in DataInput to read from
+ @param enumType Class type of Enum
+ @return Enum represented by String read from DataInput
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ len number of bytes in input streamin
+ @param in input stream
+ @param len number of bytes to skip
+ @throws IOException when skipped less number of bytes]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ CompressionCodec for which to get the
+ Compressor
+ @param conf the Configuration object which contains confs for creating or reinit the compressor
+ @return Compressor for the given
+ CompressionCodec from the pool or a new one]]>
+
+
+
+
+
+
+
+
+ CompressionCodec for which to get the
+ Decompressor
+ @return Decompressor for the given
+ CompressionCodec the pool or a new one]]>
+
+
+
+
+
+ Compressor to be returned to the pool]]>
+
+
+
+
+
+ Decompressor to be returned to the
+ pool]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Codec aliases are case insensitive.
+
+ The code alias is the short class name (without the package name).
+ If the short class name ends with 'Codec', then there are two aliases for
+ the codec, the complete short class name and the short class name without
+ the 'Codec' ending. For example for the 'GzipCodec' codec class name the
+ alias are 'gzip' and 'gzipcodec'.
+
+ @param codecName the canonical class name of the codec
+ @return the codec object]]>
+
+
+
+
+
+
+ Codec aliases are case insensitive.
+
+ The code alias is the short class name (without the package name).
+ If the short class name ends with 'Codec', then there are two aliases for
+ the codec, the complete short class name and the short class name without
+ the 'Codec' ending. For example for the 'GzipCodec' codec class name the
+ alias are 'gzip' and 'gzipcodec'.
+
+ @param codecName the canonical class name of the codec
+ @return the codec class]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Implementations are assumed to be buffered. This permits clients to
+ reposition the underlying input stream then call {@link #resetState()},
+ without having to also synchronize client buffers.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true indicating that more input data is required.
+
+ @param b Input data
+ @param off Start offset
+ @param len Length]]>
+
+
+
+
+ true if the input data buffer is empty and
+ #setInput() should be called in order to provide more input.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true if the end of the compressed
+ data output stream has been reached.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true indicating that more input data is required.
+ (Both native and non-native versions of various Decompressors require
+ that the data passed in via b[] remain unmodified until
+ the caller is explicitly notified--via {@link #needsInput()}--that the
+ buffer may be safely modified. With this requirement, an extra
+ buffer-copy can be avoided.)
+
+ @param b Input data
+ @param off Start offset
+ @param len Length]]>
+
+
+
+
+ true if the input data buffer is empty and
+ {@link #setInput(byte[], int, int)} should be called to
+ provide more input.
+
+ @return true if the input data buffer is empty and
+ {@link #setInput(byte[], int, int)} should be called in
+ order to provide more input.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ true if a preset dictionary is needed for decompression.
+ @return true if a preset dictionary is needed for decompression]]>
+
+
+
+
+ true if the end of the decompressed
+ data output stream has been reached. Indicates a concatenated data stream
+ when finished() returns true and {@link #getRemaining()}
+ returns a positive value. finished() will be reset with the
+ {@link #reset()} method.
+ @return true if the end of the decompressed
+ data output stream has been reached.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true and getRemaining() returns a positive value. If
+ {@link #finished()} returns true and getRemaining() returns
+ a zero value, indicates that the end of data stream has been reached and
+ is not a concatenated data stream.
+ @return The number of bytes remaining in the compressed data buffer.]]>
+
+
+
+
+ true and {@link #getRemaining()} returns a positive value,
+ reset() is called before processing of the next data stream in the
+ concatenated data stream. {@link #finished()} will be reset and will
+ return false when reset() is called.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Seek by key or by file offset.
+
+ The memory footprint of a TFile includes the following:
+
+
Some constant overhead of reading or writing a compressed block.
+
+
Each compressed block requires one compression/decompression codec for
+ I/O.
+
Temporary space to buffer the key.
+
Temporary space to buffer the value (for TFile.Writer only). Values are
+ chunk encoded, so that we buffer at most one chunk of user data. By default,
+ the chunk buffer is 1MB. Reading chunked value does not require additional
+ memory.
+
+
TFile index, which is proportional to the total number of Data Blocks.
+ The total amount of memory needed to hold the index can be estimated as
+ (56+AvgKeySize)*NumBlocks.
+
MetaBlock index, which is proportional to the total number of Meta
+ Blocks.The total amount of memory needed to hold the index for Meta Blocks
+ can be estimated as (40+AvgMetaBlockName)*NumMetaBlock.
+
+
+ The behavior of TFile can be customized by the following variables through
+ Configuration:
+
+
tfile.io.chunk.size: Value chunk size. Integer (in bytes). Default
+ to 1MB. Values of the length less than the chunk size is guaranteed to have
+ known value length in read time (See
+ {@link TFile.Reader.Scanner.Entry#isValueLengthKnown()}).
+
tfile.fs.output.buffer.size: Buffer size used for
+ FSDataOutputStream. Integer (in bytes). Default to 256KB.
+
tfile.fs.input.buffer.size: Buffer size used for
+ FSDataInputStream. Integer (in bytes). Default to 256KB.
+
+
+ Suggestions on performance optimization.
+
+
Minimum block size. We recommend a setting of minimum block size between
+ 256KB to 1MB for general usage. Larger block size is preferred if files are
+ primarily for sequential access. However, it would lead to inefficient random
+ access (because there are more data to decompress). Smaller blocks are good
+ for random access, but require more memory to hold the block index, and may
+ be slower to create (because we must flush the compressor stream at the
+ conclusion of each data block, which leads to an FS I/O flush). Further, due
+ to the internal caching in Compression codec, the smallest possible block
+ size would be around 20KB-30KB.
+
The current implementation does not offer true multi-threading for
+ reading. The implementation uses FSDataInputStream seek()+read(), which is
+ shown to be much faster than positioned-read call in single thread mode.
+ However, it also means that if multiple threads attempt to access the same
+ TFile (using multiple scanners) simultaneously, the actual I/O is carried out
+ sequentially even if they access different DFS blocks.
+
Compression codec. Use "none" if the data is not very compressable (by
+ compressable, I mean a compression ratio at least 2:1). Generally, use "lzo"
+ as the starting point for experimenting. "gz" overs slightly better
+ compression ratio over "lzo" but requires 4x CPU to compress and 2x CPU to
+ decompress, comparing to "lzo".
+
File system buffering, if the underlying FSDataInputStream and
+ FSDataOutputStream is already adequately buffered; or if applications
+ reads/writes keys and values in large buffers, we can reduce the sizes of
+ input/output buffering in TFile layer by setting the configuration parameters
+ "tfile.fs.input.buffer.size" and "tfile.fs.output.buffer.size".
+
+
+ Some design rationale behind TFile can be found at Hadoop-3315.]]>
+
+
+
+
+
+
+
+
+
+
+ Utils#writeVLong(out, n).
+
+ @param out
+ output stream
+ @param n
+ The integer to be encoded
+ @throws IOException
+ @see Utils#writeVLong(DataOutput, long)]]>
+
+
+
+
+
+
+
+
+
if n in [-32, 127): encode in one byte with the actual value.
+ Otherwise,
+
if n in [-20*2^8, 20*2^8): encode in two bytes: byte[0] = n/256 - 52;
+ byte[1]=n&0xff. Otherwise,
+
if n IN [-16*2^16, 16*2^16): encode in three bytes: byte[0]=n/2^16 -
+ 88; byte[1]=(n>>8)&0xff; byte[2]=n&0xff. Otherwise,
+
if n in [-8*2^24, 8*2^24): encode in four bytes: byte[0]=n/2^24 - 112;
+ byte[1] = (n>>16)&0xff; byte[2] = (n>>8)&0xff; byte[3]=n&0xff. Otherwise:
+
if n in [-2^31, 2^31): encode in five bytes: byte[0]=-125; byte[1] =
+ (n>>24)&0xff; byte[2]=(n>>16)&0xff; byte[3]=(n>>8)&0xff; byte[4]=n&0xff;
+
if n in [-2^39, 2^39): encode in six bytes: byte[0]=-124; byte[1] =
+ (n>>32)&0xff; byte[2]=(n>>24)&0xff; byte[3]=(n>>16)&0xff;
+ byte[4]=(n>>8)&0xff; byte[5]=n&0xff
+
if n in [-2^47, 2^47): encode in seven bytes: byte[0]=-123; byte[1] =
+ (n>>40)&0xff; byte[2]=(n>>32)&0xff; byte[3]=(n>>24)&0xff;
+ byte[4]=(n>>16)&0xff; byte[5]=(n>>8)&0xff; byte[6]=n&0xff;
+
if n in [-2^55, 2^55): encode in eight bytes: byte[0]=-122; byte[1] =
+ (n>>48)&0xff; byte[2] = (n>>40)&0xff; byte[3]=(n>>32)&0xff;
+ byte[4]=(n>>24)&0xff; byte[5]=(n>>16)&0xff; byte[6]=(n>>8)&0xff;
+ byte[7]=n&0xff;
+
if n in [-2^63, 2^63): encode in nine bytes: byte[0]=-121; byte[1] =
+ (n>>54)&0xff; byte[2] = (n>>48)&0xff; byte[3] = (n>>40)&0xff;
+ byte[4]=(n>>32)&0xff; byte[5]=(n>>24)&0xff; byte[6]=(n>>16)&0xff;
+ byte[7]=(n>>8)&0xff; byte[8]=n&0xff;
+
+
+ @param out
+ output stream
+ @param n
+ the integer number
+ @throws IOException]]>
+
if (FB in [-72, -33]), return (FB+52)<<8 + NB[0]&0xff;
+
if (FB in [-104, -73]), return (FB+88)<<16 + (NB[0]&0xff)<<8 +
+ NB[1]&0xff;
+
if (FB in [-120, -105]), return (FB+112)<<24 + (NB[0]&0xff)<<16 +
+ (NB[1]&0xff)<<8 + NB[2]&0xff;
+
if (FB in [-128, -121]), return interpret NB[FB+129] as a signed
+ big-endian integer.
+
+ @param in
+ input stream
+ @return the decoded long integer.
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Type of the input key.
+ @param list
+ The list
+ @param key
+ The input key.
+ @param cmp
+ Comparator for the key.
+ @return The index to the desired element if it exists; or list.size()
+ otherwise.]]>
+
+
+
+
+
+
+
+
+ Type of the input key.
+ @param list
+ The list
+ @param key
+ The input key.
+ @param cmp
+ Comparator for the key.
+ @return The index to the desired element if it exists; or list.size()
+ otherwise.]]>
+
+
+
+
+
+
+
+ Type of the input key.
+ @param list
+ The list
+ @param key
+ The input key.
+ @return The index to the desired element if it exists; or list.size()
+ otherwise.]]>
+
+
+
+
+
+
+
+ Type of the input key.
+ @param list
+ The list
+ @param key
+ The input key.
+ @return The index to the desired element if it exists; or list.size()
+ otherwise.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ An experimental {@link Serialization} for Java {@link Serializable} classes.
+
+ @see JavaSerializationComparator]]>
+
+
+
+
+
+
+
+
+
+
+ A {@link RawComparator} that uses a {@link JavaSerialization}
+ {@link Deserializer} to deserialize objects that are then compared via
+ their {@link Comparable} interfaces.
+
+ @param
+ @see JavaSerialization]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+This package provides a mechanism for using different serialization frameworks
+in Hadoop. The property "io.serializations" defines a list of
+{@link org.apache.hadoop.io.serializer.Serialization}s that know how to create
+{@link org.apache.hadoop.io.serializer.Serializer}s and
+{@link org.apache.hadoop.io.serializer.Deserializer}s.
+
+
+
+To add a new serialization framework write an implementation of
+{@link org.apache.hadoop.io.serializer.Serialization} and add its name to the
+"io.serializations" property.
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ avro.reflect.pkgs or implement
+ {@link AvroReflectSerializable} interface.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+This package provides Avro serialization in Hadoop. This can be used to
+serialize/deserialize Avro types in Hadoop.
+
+
+
+Use {@link org.apache.hadoop.io.serializer.avro.AvroSpecificSerialization} for
+serialization of classes generated by Avro's 'specific' compiler.
+
+
+
+Use {@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization} for
+other classes.
+{@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization} work for
+any class which is either in the package list configured via
+{@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization#AVRO_REFLECT_PACKAGES}
+or implement {@link org.apache.hadoop.io.serializer.avro.AvroReflectSerializable}
+interface.
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+The API is abstract so that it can be implemented on top of
+a variety of metrics client libraries. The choice of
+client library is a configuration option, and different
+modules within the same application can use
+different metrics implementation libraries.
+
+Sub-packages:
+
+
org.apache.hadoop.metrics.spi
+
The abstract Server Provider Interface package. Those wishing to
+ integrate the metrics API with a particular metrics client library should
+ extend this package.
+
+
org.apache.hadoop.metrics.file
+
An implementation package which writes the metric data to
+ a file, or sends it to the standard output stream.
+
+
org.apache.hadoop.metrics.ganglia
+
An implementation package which sends metric data to
+ Ganglia.
+
+
+
Introduction to the Metrics API
+
+Here is a simple example of how to use this package to report a single
+metric value:
+
The context name will typically identify either the application, or else a
+ module within an application or library.
+
+
myRecord
+
The record name generally identifies some entity for which a set of
+ metrics are to be reported. For example, you could have a record named
+ "cacheStats" for reporting a number of statistics relating to the usage of
+ some cache in your application.
+
+
myMetric
+
This identifies a particular metric. For example, you might have metrics
+ named "cache_hits" and "cache_misses".
+
+
+
+
Tags
+
+In some cases it is useful to have multiple records with the same name. For
+example, suppose that you want to report statistics about each disk on a computer.
+In this case, the record name would be something like "diskStats", but you also
+need to identify the disk which is done by adding a tag to the record.
+The code could look something like this:
+
+
+Data is not sent immediately to the metrics system when
+MetricsRecord.update() is called. Instead it is stored in an
+internal table, and the contents of the table are sent periodically.
+This can be important for two reasons:
+
+
It means that a programmer is free to put calls to this API in an
+ inner loop, since updates can be very frequent without slowing down
+ the application significantly.
+
Some implementations can gain efficiency by combining many metrics
+ into a single UDP message.
+
+
+The API provides a timer-based callback via the
+registerUpdater() method. The benefit of this
+versus using java.util.Timer is that the callbacks will be done
+immediately before sending the data, making the data as current as possible.
+
+
Configuration
+
+It is possible to programmatically examine and modify configuration data
+before creating a context, like this:
+
+The factory attributes can be examined and modified using the following
+ContextFactorymethods:
+
+
Object getAttribute(String attributeName)
+
String[] getAttributeNames()
+
void setAttribute(String name, Object value)
+
void removeAttribute(attributeName)
+
+
+
+ContextFactory.getFactory() initializes the factory attributes by
+reading the properties file hadoop-metrics.properties if it exists
+on the class path.
+
+
+A factory attribute named:
+
+contextName.class
+
+should have as its value the fully qualified name of the class to be
+instantiated by a call of the CodeFactory method
+getContext(contextName). If this factory attribute is not
+specified, the default is to instantiate
+org.apache.hadoop.metrics.file.FileContext.
+
+
+Other factory attributes are specific to a particular implementation of this
+API and are documented elsewhere. For example, configuration attributes for
+the file and Ganglia implementations can be found in the javadoc for
+their respective packages.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+Implementation of the metrics package that sends metric data to
+Ganglia.
+Programmers should not normally need to use this package directly. Instead
+they should use org.hadoop.metrics.
+
+
+These are the implementation specific factory attributes
+(See ContextFactory.getFactory()):
+
+
+
contextName.servers
+
Space and/or comma separated sequence of servers to which UDP
+ messages should be sent.
+
+
contextName.period
+
The period in seconds on which the metric data is sent to the
+ server(s).
+
+
contextName.multicast
+
Enable multicast for Ganglia
+
+
contextName.multicast.ttl
+
TTL for multicast packets
+
+
contextName.units.recordName.metricName
+
The units for the specified metric in the specified record.
+
+
contextName.slope.recordName.metricName
+
The slope for the specified metric in the specified record.
+
+
contextName.tmax.recordName.metricName
+
The tmax for the specified metric in the specified record.
+
+
contextName.dmax.recordName.metricName
+
The dmax for the specified metric in the specified record.
+
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ contextName.tableName. The returned map consists of
+ those attributes with the contextName and tableName stripped off.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ recordName.
+ Throws an exception if the metrics implementation is configured with a fixed
+ set of record names and recordName is not in that set.
+
+ @param recordName the name of the record
+ @throws MetricsException if recordName conflicts with configuration data]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This class implements the internal table of metric data, and the timer
+ on which data is to be sent to the metrics system. Subclasses must
+ override the abstract emitRecord method in order to transmit
+ the data.
+
+ @deprecated Use org.apache.hadoop.metrics2 package instead.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ update
+ and remove().
+
+ @deprecated Use {@link org.apache.hadoop.metrics2.impl.MetricsRecordImpl}
+ instead.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ hostname or hostname:port. If
+ the specs string is null, defaults to localhost:defaultPort.
+
+ @return a list of InetSocketAddress objects.]]>
+
+
+
+
+
+
+
+
+ org.apache.hadoop.metrics.file and
+org.apache.hadoop.metrics.ganglia.
+
+Plugging in an implementation involves writing a concrete subclass of
+AbstractMetricsContext. The subclass should get its
+ configuration information using the getAttribute(attributeName)
+ method.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Implementations of this interface consume the {@link MetricsRecord} generated
+ from {@link MetricsSource}. It registers with {@link MetricsSystem} which
+ periodically pushes the {@link MetricsRecord} to the sink using
+ {@link #putMetrics(MetricsRecord)} method. If the implementing class also
+ implements {@link Closeable}, then the MetricsSystem will close the sink when
+ it is stopped.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ the actual type of the source object
+ @param source object to register
+ @return the source object
+ @exception MetricsException]]>
+
+
+
+
+
+
+
+ the actual type of the source object
+ @param source object to register
+ @param name of the source. Must be unique or null (then extracted from
+ the annotations of the source object.)
+ @param desc the description of the source (or null. See above.)
+ @return the source object
+ @exception MetricsException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ (aggregate).
+ Filter out entries that don't have at least minSamples.
+
+ @return a map of peer DataNode Id to the average latency to that
+ node seen over the measurement period.]]>
+
+
+
+
+
+
+
+
+
+
+ This class maintains a group of rolling average metrics. It implements the
+ algorithm of rolling average, i.e. a number of sliding windows are kept to
+ roll over and evict old subsets of samples. Each window has a subset of
+ samples in a stream, where sub-sum and sub-total are collected. All sub-sums
+ and sub-totals in all windows will be aggregated to final-sum and final-total
+ used to compute final average, which is called rolling average.
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This class is a metrics sink that uses
+ {@link org.apache.hadoop.fs.FileSystem} to write the metrics logs. Every
+ roll interval a new directory will be created under the path specified by the
+ basepath property. All metrics will be logged to a file in the
+ current interval's directory in a file named <hostname>.log, where
+ <hostname> is the name of the host on which the metrics logging
+ process is running. The base path is set by the
+ <prefix>.sink.<instance>.basepath property. The
+ time zone used to create the current interval's directory name is GMT. If
+ the basepath property isn't specified, it will default to
+ "/tmp", which is the temp directory on whatever default file
+ system is configured for the cluster.
+
+
The <prefix>.sink.<instance>.ignore-error
+ property controls whether an exception is thrown when an error is encountered
+ writing a log file. The default value is true. When set to
+ false, file errors are quietly swallowed.
+
+
The roll-interval property sets the amount of time before
+ rolling the directory. The default value is 1 hour. The roll interval may
+ not be less than 1 minute. The property's value should be given as
+ number unit, where number is an integer value, and
+ unit is a valid unit. Valid units are minute, hour,
+ and day. The units are case insensitive and may be abbreviated or
+ plural. If no units are specified, hours are assumed. For example,
+ "2", "2h", "2 hour", and
+ "2 hours" are all valid ways to specify two hours.
+
+
The roll-offset-interval-millis property sets the upper
+ bound on a random time interval (in milliseconds) that is used to delay
+ before the initial roll. All subsequent rolls will happen an integer
+ number of roll intervals after the initial roll, hence retaining the original
+ offset. The purpose of this property is to insert some variance in the roll
+ times so that large clusters using this sink on every node don't cause a
+ performance impact on HDFS by rolling simultaneously. The default value is
+ 30000 (30s). When writing to HDFS, as a rule of thumb, the roll offset in
+ millis should be no less than the number of sink instances times 5.
+
+
The primary use of this class is for logging to HDFS. As it uses
+ {@link org.apache.hadoop.fs.FileSystem} to access the target file system,
+ however, it can be used to write to the local file system, Amazon S3, or any
+ other supported file system. The base path for the sink will determine the
+ file system used. An unqualified path will write to the default file system
+ set by the configuration.
+
+
Not all file systems support the ability to append to files. In file
+ systems without the ability to append to files, only one writer can write to
+ a file at a time. To allow for concurrent writes from multiple daemons on a
+ single host, the source property is used to set unique headers
+ for the log files. The property should be set to the name of
+ the source daemon, e.g. namenode. The value of the
+ source property should typically be the same as the property's
+ prefix. If this property is not set, the source is taken to be
+ unknown.
+
+
Instead of appending to an existing file, by default the sink
+ will create a new file with a suffix of ".<n>&quet;, where
+ n is the next lowest integer that isn't already used in a file name,
+ similar to the Hadoop daemon logs. NOTE: the file with the highest
+ sequence number is the newest file, unlike the Hadoop daemon logs.
+
+
For file systems that allow append, the sink supports appending to the
+ existing file instead. If the allow-append property is set to
+ true, the sink will instead append to the existing file on file systems that
+ support appends. By default, the allow-append property is
+ false.
+
+
Note that when writing to HDFS with allow-append set to true,
+ there is a minimum acceptable number of data nodes. If the number of data
+ nodes drops below that minimum, the append will succeed, but reading the
+ data will fail with an IOException in the DataStreamer class. The minimum
+ number of data nodes required for a successful append is generally 2 or
+ 3.
+
+
Note also that when writing to HDFS, the file size information is not
+ updated until the file is closed (at the end of the interval) even though
+ the data is being written successfully. This is a known HDFS limitation that
+ exists because of the performance cost of updating the metadata. See
+ HDFS-5478.
+
+
When using this sink in a secure (Kerberos) environment, two additional
+ properties must be set: keytab-key and
+ principal-key. keytab-key should contain the key by
+ which the keytab file can be found in the configuration, for example,
+ yarn.nodemanager.keytab. principal-key should
+ contain the key by which the principal can be found in the configuration,
+ for example, yarn.nodemanager.principal.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ CollectD StatsD plugin).
+
+ To configure this plugin, you will need to add the following
+ entries to your hadoop-metrics2.properties file:
+
+
+ *.sink.statsd.class=org.apache.hadoop.metrics2.sink.StatsDSink
+ [prefix].sink.statsd.server.host=
+ [prefix].sink.statsd.server.port=
+ [prefix].sink.statsd.skip.hostname=true|false (optional)
+ [prefix].sink.statsd.service.name=NameNode (name you want for service)
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ,name="
+ Where the and are the supplied parameters
+
+ @param serviceName
+ @param nameName
+ @param theMbean - the MBean to register
+ @return the named used to register the MBean]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ hostname or hostname:port. If
+ the specs string is null, defaults to localhost:defaultPort.
+
+ @param specs server specs (see description)
+ @param defaultPort the default port if not specified
+ @return a list of InetSocketAddress objects.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This method is used when parts of Hadoop need know whether to apply
+ single rack vs multi-rack policies, such as during block placement.
+ Such algorithms behave differently if they are on multi-switch systems.
+
+
+ @return true if the mapping thinks that it is on a single switch]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This predicate simply assumes that all mappings not derived from
+ this class are multi-switch.
+ @param mapping the mapping to query
+ @return true if the base class says it is single switch, or the mapping
+ is not derived from this class.]]>
+
+
+
+ It is not mandatory to
+ derive {@link DNSToSwitchMapping} implementations from it, but it is strongly
+ recommended, as it makes it easy for the Hadoop developers to add new methods
+ to this base class that are automatically picked up by all implementations.
+
+
+ This class does not extend the Configured
+ base class, and should not be changed to do so, as it causes problems
+ for subclasses. The constructor of the Configured calls
+ the {@link #setConf(Configuration)} method, which will call into the
+ subclasses before they have been fully constructed.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ If a name cannot be resolved to a rack, the implementation
+ should return {@link NetworkTopology#DEFAULT_RACK}. This
+ is what the bundled implementations do, though it is not a formal requirement
+
+ @param names the list of hosts to resolve (can be empty)
+ @return list of resolved network paths.
+ If names is empty, the returned list is also empty]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Calling {@link #setConf(Configuration)} will trigger a
+ re-evaluation of the configuration settings and so be used to
+ set up the mapping script.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This will get called in the superclass constructor, so a check is needed
+ to ensure that the raw mapping is defined before trying to relaying a null
+ configuration.
+ @param conf]]>
+
+
+
+
+
+
+
+
+
+ It contains a static class RawScriptBasedMapping that performs
+ the work: reading the configuration parameters, executing any defined
+ script, handling errors and such like. The outer
+ class extends {@link CachedDNSToSwitchMapping} to cache the delegated
+ queries.
+
+ This DNS mapper's {@link #isSingleSwitch()} predicate returns
+ true if and only if a script is defined.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Simple {@link DNSToSwitchMapping} implementation that reads a 2 column text
+ file. The columns are separated by whitespace. The first column is a DNS or
+ IP address and the second column specifies the rack where the address maps.
+
+
+ This class uses the configuration parameter {@code
+ net.topology.table.file.name} to locate the mapping file.
+
+
+ Calls to {@link #resolve(List)} will look up the address as defined in the
+ mapping file. If no entry corresponding to the address is found, the value
+ {@code /default-rack} is returned.
+
+
+ Software systems of any significant complexity require mechanisms for data
+interchange with the outside world. These interchanges typically involve the
+marshaling and unmarshaling of logical units of data to and from data streams
+(files, network connections, memory buffers etc.). Applications usually have
+some code for serializing and deserializing the data types that they manipulate
+embedded in them. The work of serialization has several features that make
+automatic code generation for it worthwhile. Given a particular output encoding
+(binary, XML, etc.), serialization of primitive types and simple compositions
+of primitives (structs, vectors etc.) is a very mechanical task. Manually
+written serialization code can be susceptible to bugs especially when records
+have a large number of fields or a record definition changes between software
+versions. Lastly, it can be very useful for applications written in different
+programming languages to be able to share and interchange data. This can be
+made a lot easier by describing the data records manipulated by these
+applications in a language agnostic manner and using the descriptions to derive
+implementations of serialization in multiple target languages.
+
+This document describes Hadoop Record I/O, a mechanism that is aimed
+at
+
+
enabling the specification of simple serializable data types (records)
+
enabling the generation of code in multiple target languages for
+marshaling and unmarshaling such types
+
providing target language specific support that will enable application
+programmers to incorporate generated code into their applications
+
+
+The goals of Hadoop Record I/O are similar to those of mechanisms such as XDR,
+ASN.1, PADS and ICE. While these systems all include a DDL that enables
+the specification of most record types, they differ widely in what else they
+focus on. The focus in Hadoop Record I/O is on data marshaling and
+multi-lingual support. We take a translator-based approach to serialization.
+Hadoop users have to describe their data in a simple data description
+language. The Hadoop DDL translator rcc generates code that users
+can invoke in order to read/write their data from/to simple stream
+abstractions. Next we list explicitly some of the goals and non-goals of
+Hadoop Record I/O.
+
+
+
Goals
+
+
+
Support for commonly used primitive types. Hadoop should include as
+primitives commonly used builtin types from programming languages we intend to
+support.
+
+
Support for common data compositions (including recursive compositions).
+Hadoop should support widely used composite types such as structs and
+vectors.
+
+
Code generation in multiple target languages. Hadoop should be capable of
+generating serialization code in multiple target languages and should be
+easily extensible to new target languages. The initial target languages are
+C++ and Java.
+
+
Support for generated target languages. Hadooop should include support
+in the form of headers, libraries, packages for supported target languages
+that enable easy inclusion and use of generated code in applications.
+
+
Support for multiple output encodings. Candidates include
+packed binary, comma-separated text, XML etc.
+
+
Support for specifying record types in a backwards/forwards compatible
+manner. This will probably be in the form of support for optional fields in
+records. This version of the document does not include a description of the
+planned mechanism, we intend to include it in the next iteration.
+
+
+
+
Non-Goals
+
+
+
Serializing existing arbitrary C++ classes.
+
Serializing complex data structures such as trees, linked lists etc.
+
Built-in indexing schemes, compression, or check-sums.
+
Dynamic construction of objects from an XML schema.
+
+
+The remainder of this document describes the features of Hadoop record I/O
+in more detail. Section 2 describes the data types supported by the system.
+Section 3 lays out the DDL syntax with some examples of simple records.
+Section 4 describes the process of code generation with rcc. Section 5
+describes target language mappings and support for Hadoop types. We include a
+fairly complete description of C++ mappings with intent to include Java and
+others in upcoming iterations of this document. The last section talks about
+supported output encodings.
+
+
+
Data Types and Streams
+
+This section describes the primitive and composite types supported by Hadoop.
+We aim to support a set of types that can be used to simply and efficiently
+express a wide range of record types in different programming languages.
+
+
Primitive Types
+
+For the most part, the primitive types of Hadoop map directly to primitive
+types in high level programming languages. Special cases are the
+ustring (a Unicode string) and buffer types, which we believe
+find wide use and which are usually implemented in library code and not
+available as language built-ins. Hadoop also supplies these via library code
+when a target language built-in is not present and there is no widely
+adopted "standard" implementation. The complete list of primitive types is:
+
+
+
byte: An 8-bit unsigned integer.
+
boolean: A boolean value.
+
int: A 32-bit signed integer.
+
long: A 64-bit signed integer.
+
float: A single precision floating point number as described by
+ IEEE-754.
+
double: A double precision floating point number as described by
+ IEEE-754.
+
ustring: A string consisting of Unicode characters.
+
buffer: An arbitrary sequence of bytes.
+
+
+
+
Composite Types
+Hadoop supports a small set of composite types that enable the description
+of simple aggregate types and containers. A composite type is serialized
+by sequentially serializing it constituent elements. The supported
+composite types are:
+
+
+
+
record: An aggregate type like a C-struct. This is a list of
+typed fields that are together considered a single unit of data. A record
+is serialized by sequentially serializing its constituent fields. In addition
+to serialization a record has comparison operations (equality and less-than)
+implemented for it, these are defined as memberwise comparisons.
+
+
vector: A sequence of entries of the same data type, primitive
+or composite.
+
+
map: An associative container mapping instances of a key type to
+instances of a value type. The key and value types may themselves be primitive
+or composite types.
+
+
+
+
Streams
+
+Hadoop generates code for serializing and deserializing record types to
+abstract streams. For each target language Hadoop defines very simple input
+and output stream interfaces. Application writers can usually develop
+concrete implementations of these by putting a one method wrapper around
+an existing stream implementation.
+
+
+
DDL Syntax and Examples
+
+We now describe the syntax of the Hadoop data description language. This is
+followed by a few examples of DDL usage.
+
+
+
+A DDL file describes one or more record types. It begins with zero or
+more include declarations, a single mandatory module declaration
+followed by zero or more class declarations. The semantics of each of
+these declarations are described below:
+
+
+
+
include: An include declaration specifies a DDL file to be
+referenced when generating code for types in the current DDL file. Record types
+in the current compilation unit may refer to types in all included files.
+File inclusion is recursive. An include does not trigger code
+generation for the referenced file.
+
+
module: Every Hadoop DDL file must have a single module
+declaration that follows the list of includes and precedes all record
+declarations. A module declaration identifies a scope within which
+the names of all types in the current file are visible. Module names are
+mapped to C++ namespaces, Java packages etc. in generated code.
+
+
class: Records types are specified through class
+declarations. A class declaration is like a Java class declaration.
+It specifies a named record type and a list of fields that constitute records
+of the type. Usage is illustrated in the following examples.
+
+
+
+
Examples
+
+
+
A simple DDL file links.jr with just one record declaration.
+
+module links {
+ class Link {
+ ustring URL;
+ boolean isRelative;
+ ustring anchorText;
+ };
+}
+
+
+The Hadoop translator is written in Java. Invocation is done by executing a
+wrapper shell script named named rcc. It takes a list of
+record description files as a mandatory argument and an
+optional language argument (the default is Java) --language or
+-l. Thus a typical invocation would look like:
+
+$ rcc -l C++ ...
+
+
+
+
Target Language Mappings and Support
+
+For all target languages, the unit of code generation is a record type.
+For each record type, Hadoop generates code for serialization and
+deserialization, record comparison and access to record members.
+
+
C++
+
+Support for including Hadoop generated C++ code in applications comes in the
+form of a header file recordio.hh which needs to be included in source
+that uses Hadoop types and a library librecordio.a which applications need
+to be linked with. The header declares the Hadoop C++ namespace which defines
+appropriate types for the various primitives, the basic interfaces for
+records and streams and enumerates the supported serialization encodings.
+Declarations of these interfaces and a description of their semantics follow:
+
+
RecFormat: An enumeration of the serialization encodings supported
+by this implementation of Hadoop.
+
+
InStream: A simple abstraction for an input stream. This has a
+single public read method that reads n bytes from the stream into
+the buffer buf. Has the same semantics as a blocking read system
+call. Returns the number of bytes read or -1 if an error occurs.
+
+
OutStream: A simple abstraction for an output stream. This has a
+single write method that writes n bytes to the stream from the
+buffer buf. Has the same semantics as a blocking write system
+call. Returns the number of bytes written or -1 if an error occurs.
+
+
RecordReader: A RecordReader reads records one at a time from
+an underlying stream in a specified record format. The reader is instantiated
+with a stream and a serialization format. It has a read method that
+takes an instance of a record and deserializes the record from the stream.
+
+
RecordWriter: A RecordWriter writes records one at a
+time to an underlying stream in a specified record format. The writer is
+instantiated with a stream and a serialization format. It has a
+write method that takes an instance of a record and serializes the
+record to the stream.
+
+
Record: The base class for all generated record types. This has two
+public methods type and signature that return the typename and the
+type signature of the record.
+
+
+
+Two files are generated for each record file (note: not for each record). If a
+record file is named "name.jr", the generated files are
+"name.jr.cc" and "name.jr.hh" containing serialization
+implementations and record type declarations respectively.
+
+For each record in the DDL file, the generated header file will contain a
+class definition corresponding to the record type, method definitions for the
+generated type will be present in the '.cc' file. The generated class will
+inherit from the abstract class hadoop::Record. The DDL files
+module declaration determines the namespace the record belongs to.
+Each '.' delimited token in the module declaration results in the
+creation of a namespace. For instance, the declaration module docs.links
+results in the creation of a docs namespace and a nested
+docs::links namespace. In the preceding examples, the Link class
+is placed in the links namespace. The header file corresponding to
+the links.jr file will contain:
+
+
+namespace links {
+ class Link : public hadoop::Record {
+ // ....
+ };
+};
+
+
+Each field within the record will cause the generation of a private member
+declaration of the appropriate type in the class declaration, and one or more
+acccessor methods. The generated class will implement the serialize and
+deserialize methods defined in hadoop::Record+. It will also
+implement the inspection methods type and signature from
+hadoop::Record. A default constructor and virtual destructor will also
+be generated. Serialization code will read/write records into streams that
+implement the hadoop::InStream and the hadoop::OutStream interfaces.
+
+For each member of a record an accessor method is generated that returns
+either the member or a reference to the member. For members that are returned
+by value, a setter method is also generated. This is true for primitive
+data members of the types byte, int, long, boolean, float and
+double. For example, for a int field called MyField the folowing
+code is generated.
+
+
+
+For a ustring or buffer or composite field. The generated code
+only contains accessors that return a reference to the field. A const
+and a non-const accessor are generated. For example:
+
+
+
+Code generation for Java is similar to that for C++. A Java class is generated
+for each record type with private members corresponding to the fields. Getters
+and setters for fields are also generated. Some differences arise in the
+way comparison is expressed and in the mapping of modules to packages and
+classes to files. For equality testing, an equals method is generated
+for each record type. As per Java requirements a hashCode method is also
+generated. For comparison a compareTo method is generated for each
+record type. This has the semantics as defined by the Java Comparable
+interface, that is, the method returns a negative integer, zero, or a positive
+integer as the invoked object is less than, equal to, or greater than the
+comparison parameter.
+
+A .java file is generated per record type as opposed to per DDL
+file as in C++. The module declaration translates to a Java
+package declaration. The module name maps to an identical Java package
+name. In addition to this mapping, the DDL compiler creates the appropriate
+directory hierarchy for the package and places the generated .java
+files in the correct directories.
+
+
Mapping Summary
+
+
+DDL Type C++ Type Java Type
+
+boolean bool boolean
+byte int8_t byte
+int int32_t int
+long int64_t long
+float float float
+double double double
+ustring std::string java.lang.String
+buffer std::string org.apache.hadoop.record.Buffer
+class type class type class type
+vector std::vector java.util.ArrayList
+map std::map java.util.TreeMap
+
+
+
Data encodings
+
+This section describes the format of the data encodings supported by Hadoop.
+Currently, three data encodings are supported, namely binary, CSV and XML.
+
+
Binary Serialization Format
+
+The binary data encoding format is fairly dense. Serialization of composite
+types is simply defined as a concatenation of serializations of the constituent
+elements (lengths are included in vectors and maps).
+
+Composite types are serialized as follows:
+
+
class: Sequence of serialized members.
+
vector: The number of elements serialized as an int. Followed by a
+sequence of serialized elements.
+
map: The number of key value pairs serialized as an int. Followed
+by a sequence of serialized (key,value) pairs.
+
+
+Serialization of primitives is more interesting, with a zero compression
+optimization for integral types and normalization to UTF-8 for strings.
+Primitive types are serialized as follows:
+
+
+
byte: Represented by 1 byte, as is.
+
boolean: Represented by 1-byte (0 or 1)
+
int/long: Integers and longs are serialized zero compressed.
+Represented as 1-byte if -120 <= value < 128. Otherwise, serialized as a
+sequence of 2-5 bytes for ints, 2-9 bytes for longs. The first byte represents
+the number of trailing bytes, N, as the negative number (-120-N). For example,
+the number 1024 (0x400) is represented by the byte sequence 'x86 x04 x00'.
+This doesn't help much for 4-byte integers but does a reasonably good job with
+longs without bit twiddling.
+
float/double: Serialized in IEEE 754 single and double precision
+format in network byte order. This is the format used by Java.
+
ustring: Serialized as 4-byte zero compressed length followed by
+data encoded as UTF-8. Strings are normalized to UTF-8 regardless of native
+language representation.
+
buffer: Serialized as a 4-byte zero compressed length followed by the
+raw bytes in the buffer.
+
+
+
+
CSV Serialization Format
+
+The CSV serialization format has a lot more structure than the "standard"
+Excel CSV format, but we believe the additional structure is useful because
+
+
+
it makes parsing a lot easier without detracting too much from legibility
+
the delimiters around composites make it obvious when one is reading a
+sequence of Hadoop records
+
+
+Serialization formats for the various types are detailed in the grammar that
+follows. The notable feature of the formats is the use of delimiters for
+indicating the certain field types.
+
+
+
A string field begins with a single quote (').
+
A buffer field begins with a sharp (#).
+
A class, vector or map begins with 's{', 'v{' or 'm{' respectively and
+ends with '}'.
+
+
+The CSV format can be described by the following grammar:
+
+
+
+The XML serialization format is the same used by Apache XML-RPC
+(http://ws.apache.org/xmlrpc/types.html). This is an extension of the original
+XML-RPC format and adds some additional data types. All record I/O types are
+not directly expressible in this format, and access to a DDL is required in
+order to convert these to valid types. All types primitive or composite are
+represented by <value> elements. The particular XML-RPC type is
+indicated by a nested element in the <value> element. The encoding for
+records is always UTF-8. Primitive types are serialized as follows:
+
+
+
byte: XML tag <ex:i1>. Values: 1-byte unsigned
+integers represented in US-ASCII
+
boolean: XML tag <boolean>. Values: "0" or "1"
+
int: XML tags <i4> or <int>. Values: 4-byte
+signed integers represented in US-ASCII.
+
long: XML tag <ex:i8>. Values: 8-byte signed integers
+represented in US-ASCII.
+
float: XML tag <ex:float>. Values: Single precision
+floating point numbers represented in US-ASCII.
+
double: XML tag <double>. Values: Double precision
+floating point numbers represented in US-ASCII.
+
ustring: XML tag <;string>. Values: String values
+represented as UTF-8. XML does not permit all Unicode characters in literal
+data. In particular, NULLs and control chars are not allowed. Additionally,
+XML processors are required to replace carriage returns with line feeds and to
+replace CRLF sequences with line feeds. Programming languages that we work
+with do not impose these restrictions on string types. To work around these
+restrictions, disallowed characters and CRs are percent escaped in strings.
+The '%' character is also percent escaped.
+
buffer: XML tag <string&>. Values: Arbitrary binary
+data. Represented as hexBinary, each byte is replaced by its 2-byte
+hexadecimal representation.
+
+
+Composite types are serialized as follows:
+
+
+
class: XML tag <struct>. A struct is a sequence of
+<member> elements. Each <member> element has a <name>
+element and a <value> element. The <name> is a string that must
+match /[a-zA-Z][a-zA-Z0-9_]*/. The value of the member is represented
+by a <value> element.
+
+
vector: XML tag <array<. An <array> contains a
+single <data> element. The <data> element is a sequence of
+<value> elements each of which represents an element of the vector.
+
+
map: XML tag <array>. Same as vector.
+
+
+
+For example:
+
+
+class {
+ int MY_INT; // value 5
+ vector MY_VEC; // values 0.1, -0.89, 2.45e4
+ buffer MY_BUF; // value '\00\n\tabc%'
+}
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This task takes the given record definition files and compiles them into
+ java or c++
+ files. It is then up to the user to compile the generated files.
+
+
The task requires the file or the nested fileset element to be
+ specified. Optional attributes are language (set the output
+ language, default is "java"),
+ destdir (name of the destination directory for generated java/c++
+ code, default is ".") and failonerror (specifies error handling
+ behavior. default is true).
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Avro.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ (cause==null ? null : cause.toString()) (which
+ typically contains the class and detail message of cause).
+ @param cause the cause (which is saved for later retrieval by the
+ {@link #getCause()} method). (A null value is
+ permitted, and indicates that the cause is nonexistent or
+ unknown.)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ mapping
+ and mapping]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ /host@realm.
+ @param principalName principal name of format as described above
+ @return host name if the the string conforms to the above format, else null]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ "jack"
+
+ @param userName
+ @return userName without login method]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ the return type of the run method
+ @param action the method to execute
+ @return the value from the run method]]>
+
+
+
+
+
+
+
+ the return type of the run method
+ @param action the method to execute
+ @return the value from the run method
+ @throws IOException if the action throws an IOException
+ @throws Error if the action throws an Error
+ @throws RuntimeException if the action throws a RuntimeException
+ @throws InterruptedException if the action throws an InterruptedException
+ @throws UndeclaredThrowableException if the action throws something else]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ (cause==null ? null : cause.toString()) (which
+ typically contains the class and detail message of cause).
+ @param cause the cause (which is saved for later retrieval by the
+ {@link #getCause()} method). (A null value is
+ permitted, and indicates that the cause is nonexistent or
+ unknown.)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ does not provide the stack trace for security purposes.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ A User-Agent String is considered to be a browser if it matches
+ any of the regex patterns from browser-useragent-regex; the default
+ behavior is to consider everything a browser that matches the following:
+ "^Mozilla.*,^Opera.*". Subclasses can optionally override
+ this method to use different behavior.
+
+ @param userAgent The User-Agent String, or null if there isn't one
+ @return true if the User-Agent String refers to a browser, false if not]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The type of the token identifier]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ T extends TokenIdentifier]]>
+
+
+
+
+
+
+
+
+
+ DelegationTokenAuthenticatedURL.
+
+ An instance of the default {@link DelegationTokenAuthenticator} will be
+ used.]]>
+
+
+
+
+ DelegationTokenAuthenticatedURL.
+
+ @param authenticator the {@link DelegationTokenAuthenticator} instance to
+ use, if null the default one will be used.]]>
+
+
+
+
+ DelegationTokenAuthenticatedURL using the default
+ {@link DelegationTokenAuthenticator} class.
+
+ @param connConfigurator a connection configurator.]]>
+
+
+
+
+ DelegationTokenAuthenticatedURL.
+
+ @param authenticator the {@link DelegationTokenAuthenticator} instance to
+ use, if null the default one will be used.
+ @param connConfigurator a connection configurator.]]>
+
+
+
+
+
+
+
+
+
+
+
+ The default class is {@link KerberosDelegationTokenAuthenticator}
+
+ @return the delegation token authenticator class to use as default.]]>
+
+
+
+
+
+
+ This method is provided to enable WebHDFS backwards compatibility.
+
+ @param useQueryString TRUE if the token is transmitted in the
+ URL query string, FALSE if the delegation token is transmitted
+ using the {@link DelegationTokenAuthenticator#DELEGATION_TOKEN_HEADER} HTTP
+ header.]]>
+
+
+
+
+ TRUE if the token is transmitted in the URL query
+ string, FALSE if the delegation token is transmitted using the
+ {@link DelegationTokenAuthenticator#DELEGATION_TOKEN_HEADER} HTTP header.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Authenticator.
+
+ @param url the URL to connect to. Only HTTP/S URLs are supported.
+ @param token the authentication token being used for the user.
+ @return an authenticated {@link HttpURLConnection}.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+ Authenticator. If the doAs parameter is not NULL,
+ the request will be done on behalf of the specified doAs user.
+
+ @param url the URL to connect to. Only HTTP/S URLs are supported.
+ @param token the authentication token being used for the user.
+ @param doAs user to do the the request on behalf of, if NULL the request is
+ as self.
+ @return an authenticated {@link HttpURLConnection}.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+ Authenticator
+ for authentication.
+
+ @param url the URL to get the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token being used for the user where the
+ Delegation token will be stored.
+ @param renewer the renewer user.
+ @return a delegation token.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+
+ Authenticator
+ for authentication.
+
+ @param url the URL to get the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token being used for the user where the
+ Delegation token will be stored.
+ @param renewer the renewer user.
+ @param doAsUser the user to do as, which will be the token owner.
+ @return a delegation token.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+ Authenticator for authentication.
+
+ @param url the URL to renew the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token with the Delegation Token to renew.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+ Authenticator for authentication.
+
+ @param url the URL to renew the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token with the Delegation Token to renew.
+ @param doAsUser the user to do as, which will be the token owner.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+ Authenticator.
+
+ @param url the URL to cancel the delegation token from. Only HTTP/S URLs
+ are supported.
+ @param token the authentication token with the Delegation Token to cancel.
+ @throws IOException if an IO error occurred.]]>
+
+
+
+
+
+
+
+
+ Authenticator.
+
+ @param url the URL to cancel the delegation token from. Only HTTP/S URLs
+ are supported.
+ @param token the authentication token with the Delegation Token to cancel.
+ @param doAsUser the user to do as, which will be the token owner.
+ @throws IOException if an IO error occurred.]]>
+
+
+
+ DelegationTokenAuthenticatedURL is a
+ {@link AuthenticatedURL} sub-class with built-in Hadoop Delegation Token
+ functionality.
+
+ The authentication mechanisms supported by default are Hadoop Simple
+ authentication (also known as pseudo authentication) and Kerberos SPNEGO
+ authentication.
+
+ Additional authentication mechanisms can be supported via {@link
+ DelegationTokenAuthenticator} implementations.
+
+ The default {@link DelegationTokenAuthenticator} is the {@link
+ KerberosDelegationTokenAuthenticator} class which supports
+ automatic fallback from Kerberos SPNEGO to Hadoop Simple authentication via
+ the {@link PseudoDelegationTokenAuthenticator} class.
+
+ AuthenticatedURL instances are not thread-safe.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Authenticator
+ for authentication.
+
+ @param url the URL to get the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token being used for the user where the
+ Delegation token will be stored.
+ @param renewer the renewer user.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+
+ Authenticator
+ for authentication.
+
+ @param url the URL to get the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token being used for the user where the
+ Delegation token will be stored.
+ @param renewer the renewer user.
+ @param doAsUser the user to do as, which will be the token owner.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+ Authenticator for authentication.
+
+ @param url the URL to renew the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token with the Delegation Token to renew.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+
+ Authenticator for authentication.
+
+ @param url the URL to renew the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token with the Delegation Token to renew.
+ @param doAsUser the user to do as, which will be the token owner.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+ Authenticator.
+
+ @param url the URL to cancel the delegation token from. Only HTTP/S URLs
+ are supported.
+ @param token the authentication token with the Delegation Token to cancel.
+ @throws IOException if an IO error occurred.]]>
+
+
+
+
+
+
+
+
+
+ Authenticator.
+
+ @param url the URL to cancel the delegation token from. Only HTTP/S URLs
+ are supported.
+ @param token the authentication token with the Delegation Token to cancel.
+ @param doAsUser the user to do as, which will be the token owner.
+ @throws IOException if an IO error occurred.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ KerberosDelegationTokenAuthenticator provides support for
+ Kerberos SPNEGO authentication mechanism and support for Hadoop Delegation
+ Token operations.
+
+ It falls back to the {@link PseudoDelegationTokenAuthenticator} if the HTTP
+ endpoint does not trigger a SPNEGO authentication]]>
+
+
+
+
+
+
+
+
+ PseudoDelegationTokenAuthenticator provides support for
+ Hadoop's pseudo authentication mechanism that accepts
+ the user name specified as a query string parameter and support for Hadoop
+ Delegation Token operations.
+
+ This mimics the model of Hadoop Simple authentication trusting the
+ {@link UserGroupInformation#getCurrentUser()} value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ live.
+ @return a (snapshotted) map of blocker name->description values]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ Do nothing if the service is null or not
+ in a state in which it can be/needs to be stopped.
+
+ The service state is checked before the operation begins.
+ This process is not thread safe.
+ @param service a service or null]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Any long-lived operation here will prevent the service state
+ change from completing in a timely manner.
+
If another thread is somehow invoked from the listener, and
+ that thread invokes the methods of the service (including
+ subclass-specific methods), there is a risk of a deadlock.
+
+
+
+ @param service the service that has changed.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The base implementation logs all arguments at the debug level,
+ then returns the passed in config unchanged.]]>
+
+
+
+
+
+
+ The action is to signal success by returning the exit code 0.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This method is called before {@link #init(Configuration)};
+ Any non-null configuration that is returned from this operation
+ becomes the one that is passed on to that {@link #init(Configuration)}
+ operation.
+
+ This permits implementations to change the configuration before
+ the init operation. As the ServiceLauncher only creates
+ an instance of the base {@link Configuration} class, it is
+ recommended to instantiate any subclass (such as YarnConfiguration)
+ that injects new resources.
+
+ @param config the initial configuration build up by the
+ service launcher.
+ @param args list of arguments passed to the command line
+ after any launcher-specific commands have been stripped.
+ @return the configuration to init the service with.
+ Recommended: pass down the config parameter with any changes
+ @throws Exception any problem]]>
+
+
+
+
+
+
+ The return value becomes the exit code of the launched process.
+
+ If an exception is raised, the policy is:
+
+
Any subset of {@link org.apache.hadoop.util.ExitUtil.ExitException}:
+ the exception is passed up unmodified.
+
+
Any exception which implements
+ {@link org.apache.hadoop.util.ExitCodeProvider}:
+ A new {@link ServiceLaunchException} is created with the exit code
+ and message of the thrown exception; the thrown exception becomes the
+ cause.
+
Any other exception: a new {@link ServiceLaunchException} is created
+ with the exit code {@link LauncherExitCodes#EXIT_EXCEPTION_THROWN} and
+ the message of the original exception (which becomes the cause).
+
+ @return the exit code
+ @throws org.apache.hadoop.util.ExitUtil.ExitException an exception passed
+ up as the exit code and error text.
+ @throws Exception any exception to report. If it provides an exit code
+ this is used in a wrapping exception.]]>
+
+
+
+
+ The command line options will be passed down before the
+ {@link Service#init(Configuration)} operation is invoked via an
+ invocation of {@link LaunchableService#bindArgs(Configuration, List)}
+ After the service has been successfully started via {@link Service#start()}
+ the {@link LaunchableService#execute()} method is called to execute the
+ service. When this method returns, the service launcher will exit, using
+ the return code from the method as its exit option.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 400 Bad Request}]]>
+
+
+
+
+
+ approximate HTTP equivalent: Approximate HTTP equivalent: {@code 401 Unauthorized}]]>
+
+
+
+
+
+
+
+
+
+
+ Approximate HTTP equivalent: Approximate HTTP equivalent: {@code 403: Forbidden}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 404: Not Found}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 405: Not allowed}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 406: Not Acceptable}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 408: Request Timeout}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 409: Conflict}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 500 Internal Server Error}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 501: Not Implemented}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 503 Service Unavailable}]]>
+
+
+
+
+
+ If raised, this is expected to be raised server-side and likely due
+ to client/server version incompatibilities.
+
+ Approximate HTTP equivalent: {@code 505: Version Not Supported}]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Codes with a YARN prefix are YARN-related.
+
+ Many of the exit codes are designed to resemble HTTP error codes,
+ squashed into a single byte. e.g 44 , "not found" is the equivalent
+ of 404. The various 2XX HTTP error codes aren't followed;
+ the Unix standard of "0" for success is used.
+
+ 0-10: general command issues
+ 30-39: equivalent to the 3XX responses, where those responses are
+ considered errors by the application.
+ 40-49: client-side/CLI/config problems
+ 50-59: service-side problems.
+ 60+ : application specific error codes
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This uses {@link String#format(String, Object...)}
+ to build the formatted exception in the ENGLISH locale.
+
+ If the last argument is a throwable, it becomes the cause of the exception.
+ It will also be used as a parameter for the format.
+ @param exitCode exit code
+ @param format format for message to use in exception
+ @param args list of arguments]]>
+
+
+
+
+ When caught by the ServiceLauncher, it will convert that
+ into a process exit code.
+
+ The {@link #ServiceLaunchException(int, String, Object...)} constructor
+ generates formatted exceptions.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Clients and/or applications can use the provided Progressable
+ to explicitly report progress to the Hadoop framework. This is especially
+ important for operations which take significant amount of time since,
+ in-lieu of the reported progress, the framework has to assume that an error
+ has occured and time-out the operation.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Class is to be obtained
+ @return the correctly typed Class of the given object.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ kill -0 command or equivalent]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ".cmd" on Windows, or ".sh" otherwise.
+
+ @param parent File parent directory
+ @param basename String script file basename
+ @return File referencing the script in the directory]]>
+
+
+
+
+
+ ".cmd" on Windows, or ".sh" otherwise.
+
+ @param basename String script file basename
+ @return String script file name]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ IOException.
+ @return the path to {@link #WINUTILS_EXE}
+ @throws RuntimeException if the path is not resolvable]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Shell.
+ @return the thread that ran runCommand() that spawned this shell
+ or null if no thread is waiting for this shell to complete]]>
+
+
+
+
+
+
+
+
+
+
+
+ Shell interface.
+ @param cmd shell command to execute.
+ @return the output of the executed command.]]>
+
+
+
+
+
+
+
+
+ Shell interface.
+ @param env the map of environment key=value
+ @param cmd shell command to execute.
+ @param timeout time in milliseconds after which script should be marked timeout
+ @return the output of the executed command.
+ @throws IOException on any problem.]]>
+
+
+
+
+
+
+
+ Shell interface.
+ @param env the map of environment key=value
+ @param cmd shell command to execute.
+ @return the output of the executed command.
+ @throws IOException on any problem.]]>
+
+
+
+
+ Shell processes.
+ Iterates through a map of all currently running Shell
+ processes and destroys them one by one. This method is thread safe]]>
+
+
+
+
+ Shell objects.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ CreateProcess synchronization object.]]>
+
+
+
+
+ os.name property.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Important: caller must check for this value being null.
+ The lack of such checks has led to many support issues being raised.
+
+ @deprecated use one of the exception-raising getter methods,
+ specifically {@link #getWinUtilsPath()} or {@link #getWinUtilsFile()}]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Shell can be used to run shell commands like du or
+ df. It also offers facilities to gate commands by
+ time-intervals.]]>
+
+
+
+
+
+
+
+ ShutdownHookManager singleton.
+
+ @return ShutdownHookManager singleton.]]>
+
+
+
+
+
+
+ Runnable
+ @param priority priority of the shutdownHook.]]>
+
+
+
+
+
+
+
+
+ Runnable
+ @param priority priority of the shutdownHook
+ @param timeout timeout of the shutdownHook
+ @param unit unit of the timeout TimeUnit]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ShutdownHookManager enables running shutdownHook
+ in a deterministic order, higher priority first.
+
+ The JVM runs ShutdownHooks in a non-deterministic order or in parallel.
+ This class registers a single JVM shutdownHook and run all the
+ shutdownHooks registered to it (to this class) in order based on their
+ priority.
+
+ Unless a hook was registered with a shutdown explicitly set through
+ {@link #addShutdownHook(Runnable, int, long, TimeUnit)},
+ the shutdown time allocated to it is set by the configuration option
+ {@link CommonConfigurationKeysPublic#SERVICE_SHUTDOWN_TIMEOUT} in
+ {@code core-site.xml}, with a default value of
+ {@link CommonConfigurationKeysPublic#SERVICE_SHUTDOWN_TIMEOUT_DEFAULT}
+ seconds.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Tool, is the standard for any Map-Reduce tool/application.
+ The tool/application should delegate the handling of
+
+ standard command-line options to {@link ToolRunner#run(Tool, String[])}
+ and only handle its custom arguments.
+
+
Here is how a typical Tool is implemented:
+
+ public class MyApp extends Configured implements Tool {
+
+ public int run(String[] args) throws Exception {
+ // Configuration processed by ToolRunner
+ Configuration conf = getConf();
+
+ // Create a JobConf using the processed conf
+ JobConf job = new JobConf(conf, MyApp.class);
+
+ // Process custom command-line options
+ Path in = new Path(args[1]);
+ Path out = new Path(args[2]);
+
+ // Specify various job-specific parameters
+ job.setJobName("my-app");
+ job.setInputPath(in);
+ job.setOutputPath(out);
+ job.setMapperClass(MyMapper.class);
+ job.setReducerClass(MyReducer.class);
+
+ // Submit the job, then poll for progress until the job is complete
+ RunningJob runningJob = JobClient.runJob(job);
+ if (runningJob.isSuccessful()) {
+ return 0;
+ } else {
+ return 1;
+ }
+ }
+
+ public static void main(String[] args) throws Exception {
+ // Let ToolRunner handle generic command-line options
+ int res = ToolRunner.run(new Configuration(), new MyApp(), args);
+
+ System.exit(res);
+ }
+ }
+
+
+ @see GenericOptionsParser
+ @see ToolRunner]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Tool by {@link Tool#run(String[])}, after
+ parsing with the given generic arguments. Uses the given
+ Configuration, or builds one if null.
+
+ Sets the Tool's configuration with the possibly modified
+ version of the conf.
+
+ @param conf Configuration for the Tool.
+ @param tool Tool to run.
+ @param args command-line arguments to the tool.
+ @return exit code of the {@link Tool#run(String[])} method.]]>
+
+
+
+
+
+
+
+ Tool with its Configuration.
+
+ Equivalent to run(tool.getConf(), tool, args).
+
+ @param tool Tool to run.
+ @param args command-line arguments to the tool.
+ @return exit code of the {@link Tool#run(String[])} method.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ToolRunner can be used to run classes implementing
+ Tool interface. It works in conjunction with
+ {@link GenericOptionsParser} to parse the
+
+ generic hadoop command line arguments and modifies the
+ Configuration of the Tool. The
+ application-specific options are passed along without being modified.
+
+
+ @see Tool
+ @see GenericOptionsParser]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ this filter.
+ @param nbHash The number of hash function to consider.
+ @param hashType type of the hashing function (see
+ {@link org.apache.hadoop.util.hash.Hash}).]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Bloom filter, as defined by Bloom in 1970.
+
+ The Bloom filter is a data structure that was introduced in 1970 and that has been adopted by
+ the networking research community in the past decade thanks to the bandwidth efficiencies that it
+ offers for the transmission of set membership information between networked hosts. A sender encodes
+ the information into a bit vector, the Bloom filter, that is more compact than a conventional
+ representation. Computation and space costs for construction are linear in the number of elements.
+ The receiver uses the filter to test whether various elements are members of the set. Though the
+ filter will occasionally return a false positive, it will never return a false negative. When creating
+ the filter, the sender can choose its desired point in a trade-off between the false positive rate and the size.
+
+
+
+
+
+
+
+
+
+
+
+
+
+ this filter.
+ @param nbHash The number of hash function to consider.
+ @param hashType type of the hashing function (see
+ {@link org.apache.hadoop.util.hash.Hash}).]]>
+
+
+
+
+
+
+
+
+ this counting Bloom filter.
+
+ Invariant: nothing happens if the specified key does not belong to this counter Bloom filter.
+ @param key The key to remove.]]>
+
+
+
+
+
+
+
+
+
+
+
+ key -> count map.
+
NOTE: due to the bucket size of this filter, inserting the same
+ key more than 15 times will cause an overflow at all filter positions
+ associated with this key, and it will significantly increase the error
+ rate for this and other keys. For this reason the filter can only be
+ used to store small count values 0 <= N << 15.
+ @param key key to be tested
+ @return 0 if the key is not present. Otherwise, a positive value v will
+ be returned such that v == count with probability equal to the
+ error rate of this filter, and v > count otherwise.
+ Additionally, if the filter experienced an underflow as a result of
+ {@link #delete(Key)} operation, the return value may be lower than the
+ count with the probability of the false negative rate of such
+ filter.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ counting Bloom filter, as defined by Fan et al. in a ToN
+ 2000 paper.
+
+ A counting Bloom filter is an improvement to standard a Bloom filter as it
+ allows dynamic additions and deletions of set membership information. This
+ is achieved through the use of a counting vector instead of a bit vector.
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Builds an empty Dynamic Bloom filter.
+ @param vectorSize The number of bits in the vector.
+ @param nbHash The number of hash function to consider.
+ @param hashType type of the hashing function (see
+ {@link org.apache.hadoop.util.hash.Hash}).
+ @param nr The threshold for the maximum number of keys to record in a
+ dynamic Bloom filter row.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ dynamic Bloom filter, as defined in the INFOCOM 2006 paper.
+
+ A dynamic Bloom filter (DBF) makes use of a s * m bit matrix but
+ each of the s rows is a standard Bloom filter. The creation
+ process of a DBF is iterative. At the start, the DBF is a 1 * m
+ bit matrix, i.e., it is composed of a single standard Bloom filter.
+ It assumes that nr elements are recorded in the
+ initial bit vector, where nr <= n (n is
+ the cardinality of the set A to record in the filter).
+
+ As the size of A grows during the execution of the application,
+ several keys must be inserted in the DBF. When inserting a key into the DBF,
+ one must first get an active Bloom filter in the matrix. A Bloom filter is
+ active when the number of recorded keys, nr, is
+ strictly less than the current cardinality of A, n.
+ If an active Bloom filter is found, the key is inserted and
+ nr is incremented by one. On the other hand, if there
+ is no active Bloom filter, a new one is created (i.e., a new row is added to
+ the matrix) according to the current size of A and the element
+ is added in this new Bloom filter and the nr value of
+ this new Bloom filter is set to one. A given key is said to belong to the
+ DBF if the k positions are set to one in one of the matrix rows.
+
+
+
+
+
+
+
+
+ Builds a hash function that must obey to a given maximum number of returned values and a highest value.
+ @param maxValue The maximum highest returned value.
+ @param nbHash The number of resulting hashed values.
+ @param hashType type of the hashing function (see {@link Hash}).]]>
+
+
+
+
+ this hash function. A NOOP]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The idea is to randomly select a bit to reset.]]>
+
+
+
+
+
+ The idea is to select the bit to reset that will generate the minimum
+ number of false negative.]]>
+
+
+
+
+
+ The idea is to select the bit to reset that will remove the maximum number
+ of false positive.]]>
+
+
+
+
+
+ The idea is to select the bit to reset that will, at the same time, remove
+ the maximum number of false positve while minimizing the amount of false
+ negative generated.]]>
+
+
+
+
+ Originally created by
+ European Commission One-Lab Project 034819.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ this filter.
+ @param nbHash The number of hash function to consider.
+ @param hashType type of the hashing function (see
+ {@link org.apache.hadoop.util.hash.Hash}).]]>
+
+
+
+
+
+
+
+
+ this retouched Bloom filter.
+
+ Invariant: if the false positive is null, nothing happens.
+ @param key The false positive key to add.]]>
+
+
+
+
+
+ this retouched Bloom filter.
+ @param coll The collection of false positive.]]>
+
+
+
+
+
+ this retouched Bloom filter.
+ @param keys The list of false positive.]]>
+
+
+
+
+
+ this retouched Bloom filter.
+ @param keys The array of false positive.]]>
+
+
+
+
+
+
+ this retouched Bloom filter.
+ @param scheme The selective clearing scheme to apply.]]>
+
+
+
+
+
+
+
+
+
+
+
+ retouched Bloom filter, as defined in the CoNEXT 2006 paper.
+
+ It allows the removal of selected false positives at the cost of introducing
+ random false negatives, and with the benefit of eliminating some random false
+ positives at the same time.
+
+
+
+
+
+
+
+
+
+
+
diff --git a/hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_3.2.4.xml b/hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_3.2.4.xml
new file mode 100644
index 0000000000000..10a4f0d5f16e5
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_3.2.4.xml
@@ -0,0 +1,35426 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ UnsupportedOperationException
+
+ If a key is deprecated in favor of multiple keys, they are all treated as
+ aliases of each other, and setting any one of them resets all the others
+ to the new value.
+
+ If you have multiple deprecation entries to add, it is more efficient to
+ use #addDeprecations(DeprecationDelta[] deltas) instead.
+
+ @param key
+ @param newKeys
+ @param customMessage
+ @deprecated use {@link #addDeprecation(String key, String newKey,
+ String customMessage)} instead]]>
+
+
+
+
+
+
+
+ UnsupportedOperationException
+
+ If you have multiple deprecation entries to add, it is more efficient to
+ use #addDeprecations(DeprecationDelta[] deltas) instead.
+
+ @param key
+ @param newKey
+ @param customMessage]]>
+
+
+
+
+
+
+ UnsupportedOperationException
+
+ If a key is deprecated in favor of multiple keys, they are all treated as
+ aliases of each other, and setting any one of them resets all the others
+ to the new value.
+
+ If you have multiple deprecation entries to add, it is more efficient to
+ use #addDeprecations(DeprecationDelta[] deltas) instead.
+
+ @param key Key that is to be deprecated
+ @param newKeys list of keys that take up the values of deprecated key
+ @deprecated use {@link #addDeprecation(String key, String newKey)} instead]]>
+
+
+
+
+
+
+ UnsupportedOperationException
+
+ If you have multiple deprecation entries to add, it is more efficient to
+ use #addDeprecations(DeprecationDelta[] deltas) instead.
+
+ @param key Key that is to be deprecated
+ @param newKey key that takes up the value of deprecated key]]>
+
+
+
+
+
+ key is deprecated.
+
+ @param key the parameter which is to be checked for deprecation
+ @return true if the key is deprecated and
+ false otherwise.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ final.
+
+ @param name resource to be added, the classpath is examined for a file
+ with that name.]]>
+
+
+
+
+
+
+
+
+
+ final.
+
+ @param url url of the resource to be added, the local filesystem is
+ examined directly to find the resource, without referring to
+ the classpath.]]>
+
+
+
+
+
+
+
+
+
+ final.
+
+ @param file file-path of resource to be added, the local filesystem is
+ examined directly to find the resource, without referring to
+ the classpath.]]>
+
+
+
+
+
+
+
+
+
+ final.
+
+ WARNING: The contents of the InputStream will be cached, by this method.
+ So use this sparingly because it does increase the memory consumption.
+
+ @param in InputStream to deserialize the object from. In will be read from
+ when a get or set is called next. After it is read the stream will be
+ closed.]]>
+
+
+
+
+
+
+
+
+
+
+ final.
+
+ @param in InputStream to deserialize the object from.
+ @param name the name of the resource because InputStream.toString is not
+ very descriptive some times.]]>
+
+
+
+
+
+
+
+
+
+
+ final.
+
+ @param conf Configuration object from which to load properties]]>
+
+
+
+
+
+
+
+
+
+
+ name property, null if
+ no such property exists. If the key is deprecated, it returns the value of
+ the first key which replaces the deprecated key and is not null.
+
+ Values are processed for variable expansion
+ before being returned.
+
+ @param name the property name, will be trimmed before get value.
+ @return the value of the name or its replacing property,
+ or null if no such property exists.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name property, but only for
+ names which have no valid value, usually non-existent or commented
+ out in XML.
+
+ @param name the property name
+ @return true if the property name exists without value]]>
+
+
+
+
+
+ name property as a trimmed String,
+ null if no such property exists.
+ If the key is deprecated, it returns the value of
+ the first key which replaces the deprecated key and is not null
+
+ Values are processed for variable expansion
+ before being returned.
+
+ @param name the property name.
+ @return the value of the name or its replacing property,
+ or null if no such property exists.]]>
+
+
+
+
+
+
+ name property as a trimmed String,
+ defaultValue if no such property exists.
+ See @{Configuration#getTrimmed} for more details.
+
+ @param name the property name.
+ @param defaultValue the property default value.
+ @return the value of the name or defaultValue
+ if it is not set.]]>
+
+
+
+
+
+ name property, without doing
+ variable expansion.If the key is
+ deprecated, it returns the value of the first key which replaces
+ the deprecated key and is not null.
+
+ @param name the property name.
+ @return the value of the name property or
+ its replacing property and null if no such property exists.]]>
+
+
+
+
+
+
+ value of the name property. If
+ name is deprecated or there is a deprecated name associated to it,
+ it sets the value to both names. Name will be trimmed before put into
+ configuration.
+
+ @param name property name.
+ @param value property value.]]>
+
+
+
+
+
+
+
+ value of the name property. If
+ name is deprecated, it also sets the value to
+ the keys that replace the deprecated key. Name will be trimmed before put
+ into configuration.
+
+ @param name property name.
+ @param value property value.
+ @param source the place that this configuration value came from
+ (For debugging).
+ @throws IllegalArgumentException when the value or name is null.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name. If the key is deprecated,
+ it returns the value of the first key which replaces the deprecated key
+ and is not null.
+ If no such property exists,
+ then defaultValue is returned.
+
+ @param name property name, will be trimmed before get value.
+ @param defaultValue default value.
+ @return property value, or defaultValue if the property
+ doesn't exist.]]>
+
+
+
+
+
+
+ name property as an int.
+
+ If no such property exists, the provided default value is returned,
+ or if the specified value is not a valid int,
+ then an error is thrown.
+
+ @param name property name.
+ @param defaultValue default value.
+ @throws NumberFormatException when the value is invalid
+ @return property value as an int,
+ or defaultValue.]]>
+
+
+
+
+
+ name property as a set of comma-delimited
+ int values.
+
+ If no such property exists, an empty array is returned.
+
+ @param name property name
+ @return property value interpreted as an array of comma-delimited
+ int values]]>
+
+
+
+
+
+
+ name property to an int.
+
+ @param name property name.
+ @param value int value of the property.]]>
+
+
+
+
+
+
+ name property as a long.
+ If no such property exists, the provided default value is returned,
+ or if the specified value is not a valid long,
+ then an error is thrown.
+
+ @param name property name.
+ @param defaultValue default value.
+ @throws NumberFormatException when the value is invalid
+ @return property value as a long,
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property as a long or
+ human readable format. If no such property exists, the provided default
+ value is returned, or if the specified value is not a valid
+ long or human readable format, then an error is thrown. You
+ can use the following suffix (case insensitive): k(kilo), m(mega), g(giga),
+ t(tera), p(peta), e(exa)
+
+ @param name property name.
+ @param defaultValue default value.
+ @throws NumberFormatException when the value is invalid
+ @return property value as a long,
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property to a long.
+
+ @param name property name.
+ @param value long value of the property.]]>
+
+
+
+
+
+
+ name property as a float.
+ If no such property exists, the provided default value is returned,
+ or if the specified value is not a valid float,
+ then an error is thrown.
+
+ @param name property name.
+ @param defaultValue default value.
+ @throws NumberFormatException when the value is invalid
+ @return property value as a float,
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property to a float.
+
+ @param name property name.
+ @param value property value.]]>
+
+
+
+
+
+
+ name property as a double.
+ If no such property exists, the provided default value is returned,
+ or if the specified value is not a valid double,
+ then an error is thrown.
+
+ @param name property name.
+ @param defaultValue default value.
+ @throws NumberFormatException when the value is invalid
+ @return property value as a double,
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property to a double.
+
+ @param name property name.
+ @param value property value.]]>
+
+
+
+
+
+
+ name property as a boolean.
+ If no such property is specified, or if the specified value is not a valid
+ boolean, then defaultValue is returned.
+
+ @param name property name.
+ @param defaultValue default value.
+ @return property value as a boolean,
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property to a boolean.
+
+ @param name property name.
+ @param value boolean value of the property.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name property to the given type. This
+ is equivalent to set(<name>, value.toString()).
+ @param name property name
+ @param value new value]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name to the given time duration. This
+ is equivalent to set(<name>, value + <time suffix>).
+ @param name Property name
+ @param value Time duration
+ @param unit Unit of time]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name property as a Pattern.
+ If no such property is specified, or if the specified value is not a valid
+ Pattern, then DefaultValue is returned.
+ Note that the returned value is NOT trimmed by this method.
+
+ @param name property name
+ @param defaultValue default value
+ @return property value as a compiled Pattern, or defaultValue]]>
+
+
+
+
+
+
+ Pattern.
+ If the pattern is passed as null, sets the empty pattern which results in
+ further calls to getPattern(...) returning the default value.
+
+ @param name property name
+ @param pattern new value]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name property as
+ a collection of Strings.
+ If no such property is specified then empty collection is returned.
+
+ This is an optimized version of {@link #getStrings(String)}
+
+ @param name property name.
+ @return property value as a collection of Strings.]]>
+
+
+
+
+
+ name property as
+ an array of Strings.
+ If no such property is specified then null is returned.
+
+ @param name property name.
+ @return property value as an array of Strings,
+ or null.]]>
+
+
+
+
+
+
+ name property as
+ an array of Strings.
+ If no such property is specified then default value is returned.
+
+ @param name property name.
+ @param defaultValue The default value
+ @return property value as an array of Strings,
+ or default value.]]>
+
+
+
+
+
+ name property as
+ a collection of Strings, trimmed of the leading and trailing whitespace.
+ If no such property is specified then empty Collection is returned.
+
+ @param name property name.
+ @return property value as a collection of Strings, or empty Collection]]>
+
+
+
+
+
+ name property as
+ an array of Strings, trimmed of the leading and trailing whitespace.
+ If no such property is specified then an empty array is returned.
+
+ @param name property name.
+ @return property value as an array of trimmed Strings,
+ or empty array.]]>
+
+
+
+
+
+
+ name property as
+ an array of Strings, trimmed of the leading and trailing whitespace.
+ If no such property is specified then default value is returned.
+
+ @param name property name.
+ @param defaultValue The default value
+ @return property value as an array of trimmed Strings,
+ or default value.]]>
+
+
+
+
+
+
+ name property as
+ as comma delimited values.
+
+ @param name property name.
+ @param values The values]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ hostProperty as a
+ InetSocketAddress. If hostProperty is
+ null, addressProperty will be used. This
+ is useful for cases where we want to differentiate between host
+ bind address and address clients should use to establish connection.
+
+ @param hostProperty bind host property name.
+ @param addressProperty address property name.
+ @param defaultAddressValue the default value
+ @param defaultPort the default port
+ @return InetSocketAddress]]>
+
+
+
+
+
+
+
+ name property as a
+ InetSocketAddress.
+ @param name property name.
+ @param defaultAddress the default value
+ @param defaultPort the default port
+ @return InetSocketAddress]]>
+
+
+
+
+
+
+ name property as
+ a host:port.]]>
+
+
+
+
+
+
+
+
+ name property as a host:port. The wildcard
+ address is replaced with the local host's address. If the host and address
+ properties are configured the host component of the address will be combined
+ with the port component of the addr to generate the address. This is to allow
+ optional control over which host name is used in multi-home bind-host
+ cases where a host can have multiple names
+ @param hostProperty the bind-host configuration name
+ @param addressProperty the service address configuration name
+ @param defaultAddressValue the service default address configuration value
+ @param addr InetSocketAddress of the service listener
+ @return InetSocketAddress for clients to connect]]>
+
+
+
+
+
+
+ name property as a host:port. The wildcard
+ address is replaced with the local host's address.
+ @param name property name.
+ @param addr InetSocketAddress of a listener to store in the given property
+ @return InetSocketAddress for clients to connect]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name property
+ as an array of Class.
+ The value of the property specifies a list of comma separated class names.
+ If no such property is specified, then defaultValue is
+ returned.
+
+ @param name the property name.
+ @param defaultValue default value.
+ @return property value as a Class[],
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property as a Class.
+ If no such property is specified, then defaultValue is
+ returned.
+
+ @param name the class name.
+ @param defaultValue default value.
+ @return property value as a Class,
+ or defaultValue.]]>
+
+
+
+
+
+
+
+ name property as a Class
+ implementing the interface specified by xface.
+
+ If no such property is specified, then defaultValue is
+ returned.
+
+ An exception is thrown if the returned class does not implement the named
+ interface.
+
+ @param name the class name.
+ @param defaultValue default value.
+ @param xface the interface implemented by the named class.
+ @return property value as a Class,
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property as a List
+ of objects implementing the interface specified by xface.
+
+ An exception is thrown if any of the classes does not exist, or if it does
+ not implement the named interface.
+
+ @param name the property name.
+ @param xface the interface implemented by the classes named by
+ name.
+ @return a List of objects implementing xface.]]>
+
+
+
+
+
+
+
+ name property to the name of a
+ theClass implementing the given interface xface.
+
+ An exception is thrown if theClass does not implement the
+ interface xface.
+
+ @param name property name.
+ @param theClass property value.
+ @param xface the interface implemented by the named class.]]>
+
+
+
+
+
+
+
+ dirsProp with
+ the given path. If dirsProp contains multiple directories,
+ then one is chosen based on path's hash code. If the selected
+ directory does not exist, an attempt is made to create it.
+
+ @param dirsProp directory in which to locate the file.
+ @param path file-path.
+ @return local file under the directory with the given path.]]>
+
+
+
+
+
+
+
+ dirsProp with
+ the given path. If dirsProp contains multiple directories,
+ then one is chosen based on path's hash code. If the selected
+ directory does not exist, an attempt is made to create it.
+
+ @param dirsProp directory in which to locate the file.
+ @param path file-path.
+ @return local file under the directory with the given path.]]>
+
+
+
+
+
+
+
+
+
+
+
+ name.
+
+ @param name configuration resource name.
+ @return an input stream attached to the resource.]]>
+
+
+
+
+
+ name.
+
+ @param name configuration resource name.
+ @return a reader attached to the resource.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ String
+ key-value pairs in the configuration.
+
+ @return an iterator over the entries.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ When property name is not empty and the property exists in the
+ configuration, this method writes the property and its attributes
+ to the {@link Writer}.
+
+
+
+
+ When property name is null or empty, this method writes all the
+ configuration properties and their attributes to the {@link Writer}.
+
+
+
+
+ When property name is not empty but the property doesn't exist in
+ the configuration, this method throws an {@link IllegalArgumentException}.
+
+
+ @param out the writer to write to.]]>
+
+
+
+
+
+
+
+
+
+ When propertyName is not empty, and the property exists
+ in the configuration, the format of the output would be,
+
+ When propertyName is not empty, and the property is not
+ found in the configuration, this method will throw an
+ {@link IllegalArgumentException}.
+
+
+ @param config the configuration
+ @param propertyName property name
+ @param out the Writer to write to
+ @throws IOException
+ @throws IllegalArgumentException when property name is not
+ empty and the property is not found in configuration]]>
+
+
+
+
+
+
+
+
+ { "properties" :
+ [ { key : "key1",
+ value : "value1",
+ isFinal : "key1.isFinal",
+ resource : "key1.resource" },
+ { key : "key2",
+ value : "value2",
+ isFinal : "ke2.isFinal",
+ resource : "key2.resource" }
+ ]
+ }
+
+
+ It does not output the properties of the configuration object which
+ is loaded from an input stream.
+
+
+ @param config the configuration
+ @param out the Writer to write to
+ @throws IOException]]>
+
Configurations are specified by resources. A resource contains a set of
+ name/value pairs as XML data. Each resource is named by either a
+ String or by a {@link Path}. If named by a String,
+ then the classpath is examined for a file with that name. If named by a
+ Path, then the local filesystem is examined directly, without
+ referring to the classpath.
+
+
Unless explicitly turned off, Hadoop by default specifies two
+ resources, loaded in-order from the classpath:
core-site.xml: Site-specific configuration for a given hadoop
+ installation.
+
+ Applications may add additional resources, which are loaded
+ subsequent to these resources in the order they are added.
+
+
Final Parameters
+
+
Configuration parameters may be declared final.
+ Once a resource declares a value final, no subsequently-loaded
+ resource can alter that value.
+ For example, one might define a final parameter with:
+
When conf.get("tempdir") is called, then ${basedir}
+ will be resolved to another property in this Configuration, while
+ ${user.name} would then ordinarily be resolved to the value
+ of the System property with that name.
+
When conf.get("otherdir") is called, then ${env.BASE_DIR}
+ will be resolved to the value of the ${BASE_DIR} environment variable.
+ It supports ${env.NAME:-default} and ${env.NAME-default} notations.
+ The former is resolved to "default" if ${NAME} environment variable is undefined
+ or its value is empty.
+ The latter behaves the same way only if ${NAME} is undefined.
+
By default, warnings will be given to any deprecated configuration
+ parameters and these are suppressible by configuring
+ log4j.logger.org.apache.hadoop.conf.Configuration.deprecation in
+ log4j.properties file.
+
+
Tags
+
+
Optionally we can tag related properties together by using tag
+ attributes. System tags are defined by hadoop.tags.system property. Users
+ can define there own custom tags in hadoop.tags.custom property.
+
+
Properties marked with tags can be retrieved with conf
+ .getAllPropertiesByTag("HDFS") or conf.getAllPropertiesByTags
+ (Arrays.asList("YARN","SECURITY")).
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This implementation generates the key material and calls the
+ {@link #createKey(String, byte[], Options)} method.
+
+ @param name the base name of the key
+ @param options the options for the new key.
+ @return the version name of the first version of the key.
+ @throws IOException
+ @throws NoSuchAlgorithmException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This implementation generates the key material and calls the
+ {@link #rollNewVersion(String, byte[])} method.
+
+ @param name the basename of the key
+ @return the name of the new version of the key
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ KeyProvider implementations must be thread safe.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ NULL if
+ a provider for the specified URI scheme could not be found.
+ @throws IOException thrown if the provider failed to initialize.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ uri has syntax error]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ uri is
+ not found]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ uri
+ determines a configuration property name,
+ fs.AbstractFileSystem.scheme.impl whose value names the
+ AbstractFileSystem class.
+
+ The entire URI and conf is passed to the AbstractFileSystem factory method.
+
+ @param uri for the file system to be created.
+ @param conf which is passed to the file system impl.
+
+ @return file system for the given URI.
+
+ @throws UnsupportedFileSystemException if the file system for
+ uri is not supported.]]>
+
+
+
+
+
+
+
+
+
+
+
+ default port;]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ In some FileSystem implementations such as HDFS metadata
+ synchronization is essential to guarantee consistency of read requests
+ particularly in HA setting.
+ @throws IOException
+ @throws UnsupportedOperationException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ describing modifications
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+
+ describing entries to remove
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ describing modifications, must include entries
+ for user, group, and others for compatibility with permission bits.
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+ which returns each AclStatus
+ @throws IOException if an ACL could not be read]]>
+
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to modify
+ @param name xattr name.
+ @param value xattr value.
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to modify
+ @param name xattr name.
+ @param value xattr value.
+ @param flag xattr set flag
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attribute
+ @param name xattr name.
+ @return byte[] xattr value.
+ @throws IOException]]>
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @return Map describing the XAttrs of the file or directory
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @param names XAttr names.
+ @return Map describing the XAttrs of the file or directory
+ @throws IOException]]>
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @return Map describing the XAttrs of the file or directory
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to remove extended attribute
+ @param name xattr name
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ BlockLocation(offset: 0, length: BLOCK_SIZE,
+ hosts: {"host1:9866", "host2:9866, host3:9866"})
+
+
+ And if the file is erasure-coded, each BlockLocation represents a logical
+ block groups. Value offset is the offset of a block group in the file and
+ value length is the total length of a block group. Hosts of a BlockLocation
+ are the datanodes that holding all the data blocks and parity blocks of a
+ block group.
+ Suppose we have a RS_3_2 coded file (3 data units and 2 parity units).
+ A BlockLocation example will be like:
+
CREATE - to create a file if it does not exist,
+ else throw FileAlreadyExists.
+
APPEND - to append to a file if it exists,
+ else throw FileNotFoundException.
+
OVERWRITE - to truncate a file if it exists,
+ else throw FileNotFoundException.
+
CREATE|APPEND - to create a file if it does not exist,
+ else append to an existing file.
+
CREATE|OVERWRITE - to create a file if it does not exist,
+ else overwrite an existing file.
+
SYNC_BLOCK - to force closed blocks to the disk device.
+ In addition {@link Syncable#hsync()} should be called after each write,
+ if true synchronous behavior is required.
+
LAZY_PERSIST - Create the block on transient storage (RAM) if
+ available.
+
APPEND_NEWBLOCK - Append data to a new block instead of end of the last
+ partial block.
+
+
+ Following combinations are not valid and will result in
+ {@link HadoopIllegalArgumentException}:
+
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws AccessControlException if access denied
+ @throws IOException If an IO Error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+
+ RuntimeExceptions:
+ @throws InvalidPathException If path f is not valid]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Progress - to report progress on the operation - default null
+
Permission - umask is applied against permission: default is
+ FsPermissions:getDefault()
+
+
CreateParent - create missing parent path; default is to not
+ to create parents
+
The defaults for the following are SS defaults of the file
+ server implementing the target path. Not all parameters make sense
+ for all kinds of file system - eg. localFS ignores Blocksize,
+ replication, checksum
+
+
BufferSize - buffersize used in FSDataOutputStream
+
Blocksize - block size for file blocks
+
ReplicationFactor - replication for blocks
+
ChecksumParam - Checksum parameters. server default is used
+ if not specified.
+
+
+
+ @return {@link FSDataOutputStream} for created file
+
+ @throws AccessControlException If access is denied
+ @throws FileAlreadyExistsException If file f already exists
+ @throws FileNotFoundException If parent of f does not exist
+ and createParent is false
+ @throws ParentNotDirectoryException If parent of f is not a
+ directory.
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+
+ RuntimeExceptions:
+ @throws InvalidPathException If path f is not valid]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ dir already
+ exists
+ @throws FileNotFoundException If parent of dir does not exist
+ and createParent is false
+ @throws ParentNotDirectoryException If parent of dir is not a
+ directory
+ @throws UnsupportedFileSystemException If file system for dir
+ is not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+
+ RuntimeExceptions:
+ @throws InvalidPathException If path dir is not valid]]>
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+
+ RuntimeExceptions:
+ @throws InvalidPathException If path f is invalid]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f
+ is not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
Fails if path is a directory.
+
Fails if path does not exist.
+
Fails if path is not closed.
+
Fails if new size is greater than current size.
+
+ @param f The path to the file to be truncated
+ @param newLength The size the file is to be truncated to
+
+ @return true if the file has been truncated to the desired
+ newLength and is immediately available to be reused for
+ write operations such as append, or
+ false if a background process of adjusting the length of
+ the last block has been started, and clients should wait for it to
+ complete before proceeding with further file updates.
+
+ @throws AccessControlException If access is denied
+ @throws FileNotFoundException If file f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Fails if src is a file and dst is a directory.
+
Fails if src is a directory and dst is a file.
+
Fails if the parent of dst does not exist or is a file.
+
+
+ If OVERWRITE option is not passed as an argument, rename fails if the dst
+ already exists.
+
+ If OVERWRITE option is passed as an argument, rename overwrites the dst if
+ it is a file or an empty directory. Rename fails if dst is a non-empty
+ directory.
+
+ Note that atomicity of rename is dependent on the file system
+ implementation. Please refer to the file system documentation for details
+
+
+ @param src path to be renamed
+ @param dst new path after rename
+
+ @throws AccessControlException If access is denied
+ @throws FileAlreadyExistsException If dst already exists and
+ options has {@link Options.Rename#OVERWRITE}
+ option false.
+ @throws FileNotFoundException If src does not exist
+ @throws ParentNotDirectoryException If parent of dst is not a
+ directory
+ @throws UnsupportedFileSystemException If file system for src
+ and dst is not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f
+ is not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+
+ RuntimeExceptions:
+ @throws HadoopIllegalArgumentException If username or
+ groupname is invalid.]]>
+
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If the given path does not refer to a symlink
+ or an I/O error occurred]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Given a path referring to a symlink of form:
+
+ <---X--->
+ fs://host/A/B/link
+ <-----Y----->
+
+ In this path X is the scheme and authority that identify the file system,
+ and Y is the path leading up to the final path component "link". If Y is
+ a symlink itself then let Y' be the target of Y and X' be the scheme and
+ authority of Y'. Symlink targets may:
+
+ 1. Fully qualified URIs
+
+ fs://hostX/A/B/file Resolved according to the target file system.
+
+ 2. Partially qualified URIs (eg scheme but no host)
+
+ fs:///A/B/file Resolved according to the target file system. Eg resolving
+ a symlink to hdfs:///A results in an exception because
+ HDFS URIs must be fully qualified, while a symlink to
+ file:///A will not since Hadoop's local file systems
+ require partially qualified URIs.
+
+ 3. Relative paths
+
+ path Resolves to [Y'][path]. Eg if Y resolves to hdfs://host/A and path
+ is "../B/file" then [Y'][path] is hdfs://host/B/file
+
+ 4. Absolute paths
+
+ path Resolves to [X'][path]. Eg if Y resolves hdfs://host/A/B and path
+ is "/file" then [X][path] is hdfs://host/file
+
+
+ @param target the target of the symbolic link
+ @param link the path to be created that points to target
+ @param createParent if true then missing parent dirs are created if
+ false then parent must exist
+
+
+ @throws AccessControlException If access is denied
+ @throws FileAlreadyExistsException If file linkcode> already exists
+ @throws FileNotFoundException If target does not exist
+ @throws ParentNotDirectoryException If parent of link is not a
+ directory.
+ @throws UnsupportedFileSystemException If file system for
+ target or link is not supported
+ @throws IOException If an I/O error occurred]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+ f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ describing modifications
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+
+ describing entries to remove
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ describing modifications, must include entries
+ for user, group, and others for compatibility with permission bits.
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+ which returns each AclStatus
+ @throws IOException if an ACL could not be read]]>
+
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to modify
+ @param name xattr name.
+ @param value xattr value.
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to modify
+ @param name xattr name.
+ @param value xattr value.
+ @param flag xattr set flag
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attribute
+ @param name xattr name.
+ @return byte[] xattr value.
+ @throws IOException]]>
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @return Map describing the XAttrs of the file or directory
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @param names XAttr names.
+ @return Map describing the XAttrs of the file or directory
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to remove extended attribute
+ @param name xattr name
+ @throws IOException]]>
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @return List of the XAttr names of the file or directory
+ @throws IOException]]>
+
+
+
+
+
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Path Names
+
+ The Hadoop file system supports a URI namespace and URI names. This enables
+ multiple types of file systems to be referenced using fully-qualified URIs.
+ Two common Hadoop file system implementations are
+
+
the local file system: file:///path
+
the HDFS file system: hdfs://nnAddress:nnPort/path
+
+
+ The Hadoop file system also supports additional naming schemes besides URIs.
+ Hadoop has the concept of a default file system, which implies a
+ default URI scheme and authority. This enables slash-relative names
+ relative to the default FS, which are more convenient for users and
+ application writers. The default FS is typically set by the user's
+ environment, though it can also be manually specified.
+
+
+ Hadoop also supports working-directory-relative names, which are paths
+ relative to the current working directory (similar to Unix). The working
+ directory can be in a different file system than the default FS.
+
+ Thus, Hadoop path names can be specified as one of the following:
+
+
a fully-qualified URI: scheme://authority/path (e.g.
+ hdfs://nnAddress:nnPort/foo/bar)
+
a slash-relative name: path relative to the default file system (e.g.
+ /foo/bar)
+
a working-directory-relative name: path relative to the working dir (e.g.
+ foo/bar)
+
+ Relative paths with scheme (scheme:foo/bar) are illegal.
+
+
Role of FileContext and Configuration Defaults
+
+ The FileContext is the analogue of per-process file-related state in Unix. It
+ contains two properties:
+
+
+
the default file system (for resolving slash-relative names)
+
the umask (for file permissions)
+
+ In general, these properties are obtained from the default configuration file
+ in the user's environment (see {@link Configuration}).
+
+ Further file system properties are specified on the server-side. File system
+ operations default to using these server-side defaults unless otherwise
+ specified.
+
+ The file system related server-side defaults are:
+
+
the home directory (default is "/user/userName")
+
the initial wd (only for local fs)
+
replication factor
+
block size
+
buffer size
+
encryptDataTransfer
+
checksum option. (checksumType and bytesPerChecksum)
+
+
+
Example Usage
+
+ Example 1: use the default config read from the $HADOOP_CONFIG/core.xml.
+ Unspecified values come from core-defaults.xml in the release jar.
+
+
myFContext = FileContext.getFileContext(); // uses the default config
+ // which has your default FS
+
myFContext.create(path, ...);
+
myFContext.setWorkingDir(path);
+
myFContext.open (path, ...);
+
...
+
+ Example 2: Get a FileContext with a specific URI as the default FS
+
+
myFContext = FileContext.getFileContext(URI);
+
myFContext.create(path, ...);
+
...
+
+ Example 3: FileContext with local file system as the default
+
+ If the configuration has the property
+ {@code "fs.$SCHEME.impl.disable.cache"} set to true,
+ a new instance will be created, initialized with the supplied URI and
+ configuration, then returned without being cached.
+
+
+ If the there is a cached FS instance matching the same URI, it will
+ be returned.
+
+
+ Otherwise: a new FS instance will be created, initialized with the
+ configuration and URI, cached and returned to the caller.
+
+
+ @throws IOException if the FileSystem cannot be instantiated.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ if f == null :
+ result = null
+ elif f.getLen() <= start:
+ result = []
+ else result = [ locations(FS, b) for b in blocks(FS, p, s, s+l)]
+
+ This call is most helpful with and distributed filesystem
+ where the hostnames of machines that contain blocks of the given file
+ can be determined.
+
+ The default implementation returns an array containing one element:
+
+
+ And if a file is erasure-coded, the returned BlockLocation are logical
+ block groups.
+
+ Suppose we have a RS_3_2 coded file (3 data units and 2 parity units).
+ 1. If the file size is less than one stripe size, say 2 * CELL_SIZE, then
+ there will be one BlockLocation returned, with 0 offset, actual file size
+ and 4 hosts (2 data blocks and 2 parity blocks) hosting the actual blocks.
+ 3. If the file size is less than one group size but greater than one
+ stripe size, then there will be one BlockLocation returned, with 0 offset,
+ actual file size with 5 hosts (3 data blocks and 2 parity blocks) hosting
+ the actual blocks.
+ 4. If the file size is greater than one group size, 3 * BLOCK_SIZE + 123
+ for example, then the result will be like:
+
Fails if the parent of dst does not exist or is a file.
+
+
+ If OVERWRITE option is not passed as an argument, rename fails
+ if the dst already exists.
+
+ If OVERWRITE option is passed as an argument, rename overwrites
+ the dst if it is a file or an empty directory. Rename fails if dst is
+ a non-empty directory.
+
+ Note that atomicity of rename is dependent on the file system
+ implementation. Please refer to the file system documentation for
+ details. This default implementation is non atomic.
+
+ This method is deprecated since it is a temporary method added to
+ support the transition from FileSystem to FileContext for user
+ applications.
+
+ @param src path to be renamed
+ @param dst new path after rename
+ @throws FileNotFoundException src path does not exist, or the parent
+ path of dst does not exist.
+ @throws FileAlreadyExistsException dest path exists and is a file
+ @throws ParentNotDirectoryException if the parent path of dest is not
+ a directory
+ @throws IOException on failure]]>
+
+
+
+
+
+
+
+
+
Fails if path is a directory.
+
Fails if path does not exist.
+
Fails if path is not closed.
+
Fails if new size is greater than current size.
+
+ @param f The path to the file to be truncated
+ @param newLength The size the file is to be truncated to
+
+ @return true if the file has been truncated to the desired
+ newLength and is immediately available to be reused for
+ write operations such as append, or
+ false if a background process of adjusting the length of
+ the last block has been started, and clients should wait for it to
+ complete before proceeding with further file updates.
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default).]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Clean shutdown of the JVM cannot be guaranteed.
+
The time to shut down a FileSystem will depends on the number of
+ files to delete. For filesystems where the cost of checking
+ for the existence of a file/directory and the actual delete operation
+ (for example: object stores) is high, the time to shutdown the JVM can be
+ significantly extended by over-use of this feature.
+
Connectivity problems with a remote filesystem may delay shutdown
+ further, and may cause the files to not be deleted.
+
+ @param f the path to delete.
+ @return true if deleteOnExit is successful, otherwise false.
+ @throws IOException IO failure]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Does not guarantee to return the List of files/directories status in a
+ sorted order.
+
+ Will not return null. Expect IOException upon access error.
+ @param f given path
+ @return the statuses of the files/directories in the given patch
+ @throws FileNotFoundException when the path does not exist
+ @throws IOException see specific implementation]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Does not guarantee to return the List of files/directories status in a
+ sorted order.
+
+ @param f
+ a path name
+ @param filter
+ the user-supplied path filter
+ @return an array of FileStatus objects for the files under the given path
+ after applying the filter
+ @throws FileNotFoundException when the path does not exist
+ @throws IOException see specific implementation]]>
+
+
+
+
+
+
+
+
+ Does not guarantee to return the List of files/directories status in a
+ sorted order.
+
+ @param files
+ a list of paths
+ @return a list of statuses for the files under the given paths after
+ applying the filter default Path filter
+ @throws FileNotFoundException when the path does not exist
+ @throws IOException see specific implementation]]>
+
+
+
+
+
+
+
+
+
+ Does not guarantee to return the List of files/directories status in a
+ sorted order.
+
+ @param files
+ a list of paths
+ @param filter
+ the user-supplied path filter
+ @return a list of statuses for the files under the given paths after
+ applying the filter
+ @throws FileNotFoundException when the path does not exist
+ @throws IOException see specific implementation]]>
+
+
+
+
+
+
+ Return all the files that match filePattern and are not checksum
+ files. Results are sorted by their names.
+
+
+ A filename pattern is composed of regular characters and
+ special pattern matching characters, which are:
+
+
+
+
+
+
?
+
Matches any single character.
+
+
+
*
+
Matches zero or more characters.
+
+
+
[abc]
+
Matches a single character from character set
+ {a,b,c}.
+
+
+
[a-b]
+
Matches a single character from the character range
+ {a...b}. Note that character a must be
+ lexicographically less than or equal to character b.
+
+
+
[^a]
+
Matches a single character that is not from character set or range
+ {a}. Note that the ^ character must occur
+ immediately to the right of the opening bracket.
+
+
+
\c
+
Removes (escapes) any special meaning of character c.
+
+
+
{ab,cd}
+
Matches a string from the string set {ab, cd}
+
+
+
{ab,c{de,fh}}
+
Matches a string from the string set {ab, cde, cfh}
+
+
+
+
+
+ @param pathPattern a glob specifying a path pattern
+
+ @return an array of paths that match the path pattern
+ @throws IOException IO failure]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws IOException If an I/O error occurred]]>
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws IOException if any I/O error occurred]]>
+
+
+
+
+
+
+
+ p does not exist
+ @throws IOException if any I/O error occurred]]>
+
+
+
+
+
+
+
+
+
+ If the path is a directory,
+ if recursive is false, returns files in the directory;
+ if recursive is true, return files in the subtree rooted at the path.
+ If the path is a file, return the file's status and block locations.
+
+ @param f is the path
+ @param recursive if the subdirectories need to be traversed recursively
+
+ @return an iterator that traverses statuses of the files
+
+ @throws FileNotFoundException when the path does not exist;
+ @throws IOException see specific implementation]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ undefined.
+ @throws IOException IO failure]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ In some FileSystem implementations such as HDFS metadata
+ synchronization is essential to guarantee consistency of read requests
+ particularly in HA setting.
+ @throws IOException
+ @throws UnsupportedOperationException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ describing modifications
+ @throws IOException if an ACL could not be modified
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to modify
+ @param name xattr name.
+ @param value xattr value.
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to modify
+ @param name xattr name.
+ @param value xattr value.
+ @param flag xattr set flag
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attribute
+ @param name xattr name.
+ @return byte[] xattr value.
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @return Map describing the XAttrs of the file or directory
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @param names XAttr names.
+ @return Map describing the XAttrs of the file or directory
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @return List of the XAttr names of the file or directory
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to remove extended attribute
+ @param name xattr name
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This is a default method which is intended to be overridden by
+ subclasses. The default implementation returns an empty storage statistics
+ object.
+
+ @return The StorageStatistics for this FileSystem instance.
+ Will never be null.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ All user code that may potentially use the Hadoop Distributed
+ File System should be written to use a FileSystem object or its
+ successor, {@link FileContext}.
+
+
+ The local implementation is {@link LocalFileSystem} and distributed
+ implementation is DistributedFileSystem. There are other implementations
+ for object stores and (outside the Apache Hadoop codebase),
+ third party filesystems.
+
+ Notes
+
+
The behaviour of the filesystem is
+
+ specified in the Hadoop documentation.
+ However, the normative specification of the behavior of this class is
+ actually HDFS: if HDFS does not behave the way these Javadocs or
+ the specification in the Hadoop documentations define, assume that
+ the documentation is incorrect.
+
+
The term {@code FileSystem} refers to an instance of this class.
+
The acronym "FS" is used as an abbreviation of FileSystem.
+
The term {@code filesystem} refers to the distributed/local filesystem
+ itself, rather than the class used to interact with it.
+
The term "file" refers to a file in the remote filesystem,
+ rather than instances of {@code java.io.File}.
+ Fencing is configured by the operator as an ordered list of methods to
+ attempt. Each method will be tried in turn, and the next in the list
+ will only be attempted if the previous one fails. See {@link NodeFencer}
+ for more information.
+
+ If an implementation also implements {@link Configurable} then its
+ setConf method will be called upon instantiation.]]>
+
StaticUserWebFilter - An authorization plugin that makes all
+users a static configured user.
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ public class IntArrayWritable extends ArrayWritable {
+ public IntArrayWritable() {
+ super(IntWritable.class);
+ }
+ }
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a ByteWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ the class of the item
+ @param conf the configuration to store
+ @param item the object to be stored
+ @param keyName the name of the key to use
+ @throws IOException : forwards Exceptions from the underlying
+ {@link Serialization} classes.]]>
+
+
+
+
+
+
+
+
+ the class of the item
+ @param conf the configuration to use
+ @param keyName the name of the key to use
+ @param itemClass the class of the item
+ @return restored object
+ @throws IOException : forwards Exceptions from the underlying
+ {@link Serialization} classes.]]>
+
+
+
+
+
+
+
+
+ the class of the item
+ @param conf the configuration to use
+ @param items the objects to be stored
+ @param keyName the name of the key to use
+ @throws IndexOutOfBoundsException if the items array is empty
+ @throws IOException : forwards Exceptions from the underlying
+ {@link Serialization} classes.]]>
+
+
+
+
+
+
+
+
+ the class of the item
+ @param conf the configuration to use
+ @param keyName the name of the key to use
+ @param itemClass the class of the item
+ @return restored object
+ @throws IOException : forwards Exceptions from the underlying
+ {@link Serialization} classes.]]>
+
+
+
+
+ DefaultStringifier offers convenience methods to store/load objects to/from
+ the configuration.
+
+ @param the class of the objects to stringify]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a DoubleWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ value argument is null or
+ its size is zero, the elementType argument must not be null. If
+ the argument value's size is bigger than zero, the argument
+ elementType is not be used.
+
+ @param value
+ @param elementType]]>
+
+
+
+
+ value should not be null
+ or empty.
+
+ @param value]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ value and elementType. If the value argument
+ is null or its size is zero, the elementType argument must not be
+ null. If the argument value's size is bigger than zero, the
+ argument elementType is not be used.
+
+ @param value
+ @param elementType]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is an EnumSetWritable with the same value,
+ or both are null.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a FloatWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ When two sequence files, which have same Key type but different Value
+ types, are mapped out to reduce, multiple Value types is not allowed.
+ In this case, this class can help you wrap instances with different types.
+
+
+
+ Compared with ObjectWritable, this class is much more effective,
+ because ObjectWritable will append the class declaration as a String
+ into the output file in every Key-Value pair.
+
+
+
+ Generic Writable implements {@link Configurable} interface, so that it will be
+ configured by the framework. The configuration is passed to the wrapped objects
+ implementing {@link Configurable} interface before deserialization.
+
+
+ how to use it:
+ 1. Write your own class, such as GenericObject, which extends GenericWritable.
+ 2. Implements the abstract method getTypes(), defines
+ the classes which will be wrapped in GenericObject in application.
+ Attention: this classes defined in getTypes() method, must
+ implement Writable interface.
+
+
+ @since Nov 8, 2006]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a IntWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ closes the input and output streams
+ at the end.
+
+ @param in InputStrem to read from
+ @param out OutputStream to write to
+ @param conf the Configuration object]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ignore any {@link Throwable} or
+ null pointers. Must only be used for cleanup in exception handlers.
+
+ @param log the log to record problems to at debug level. Can be null.
+ @param closeables the objects to close
+ @deprecated use {@link #cleanupWithLogger(Logger, java.io.Closeable...)}
+ instead]]>
+
+
+
+
+
+
+ ignore any {@link Throwable} or
+ null pointers. Must only be used for cleanup in exception handlers.
+
+ @param logger the log to record problems to at debug level. Can be null.
+ @param closeables the objects to close]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This is better than File#listDir because it does not ignore IOExceptions.
+
+ @param dir The directory to list.
+ @param filter If non-null, the filter to use when listing
+ this directory.
+ @return The list of files in the directory.
+
+ @throws IOException On I/O error]]>
+
+
+
+
+
+
+
+ Borrowed from Uwe Schindler in LUCENE-5588
+ @param fileToSync the file to fsync]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a LongWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ A map is a directory containing two files, the data file,
+ containing all keys and values in the map, and a smaller index
+ file, containing a fraction of the keys. The fraction is determined by
+ {@link Writer#getIndexInterval()}.
+
+
The index file is read entirely into memory. Thus key implementations
+ should try to keep themselves small.
+
+
Map files are created by adding entries in-order. To maintain a large
+ database, perform updates by copying the previous version of a database and
+ merging in a sorted change list, to create a new version of the database in
+ a new file. Sorting large change lists can be done with {@link
+ SequenceFile.Sorter}.]]>
+
SequenceFile provides {@link SequenceFile.Writer},
+ {@link SequenceFile.Reader} and {@link Sorter} classes for writing,
+ reading and sorting respectively.
+
+ There are three SequenceFileWriters based on the
+ {@link CompressionType} used to compress key/value pairs:
+
+
+ Writer : Uncompressed records.
+
+
+ RecordCompressWriter : Record-compressed files, only compress
+ values.
+
+
+ BlockCompressWriter : Block-compressed files, both keys &
+ values are collected in 'blocks'
+ separately and compressed. The size of
+ the 'block' is configurable.
+
+
+
The actual compression algorithm used to compress key and/or values can be
+ specified by using the appropriate {@link CompressionCodec}.
+
+
The recommended way is to use the static createWriter methods
+ provided by the SequenceFile to chose the preferred format.
+
+
The {@link SequenceFile.Reader} acts as the bridge and can read any of the
+ above SequenceFile formats.
+
+
SequenceFile Formats
+
+
Essentially there are 3 different formats for SequenceFiles
+ depending on the CompressionType specified. All of them share a
+ common header described below.
+
+
SequenceFile Header
+
+
+ version - 3 bytes of magic header SEQ, followed by 1 byte of actual
+ version number (e.g. SEQ4 or SEQ6)
+
+
+ keyClassName -key class
+
+
+ valueClassName - value class
+
+
+ compression - A boolean which specifies if compression is turned on for
+ keys/values in this file.
+
+
+ blockCompression - A boolean which specifies if block-compression is
+ turned on for keys/values in this file.
+
+
+ compression codec - CompressionCodec class which is used for
+ compression of keys and/or values (if compression is
+ enabled).
+
+
+ metadata - {@link Metadata} for this file.
+
+
+ sync - A sync marker to denote end of the header.
+
The compressed blocks of key lengths and value lengths consist of the
+ actual lengths of individual keys/values encoded in ZeroCompressedInteger
+ format.
+
+ @see CompressionCodec]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a ShortWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ the class of the objects to stringify]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ position. Note that this
+ method avoids using the converter or doing String instantiation
+ @return the Unicode scalar value at position or -1
+ if the position is invalid or points to a
+ trailing byte]]>
+
+
+
+
+
+
+
+
+
+ what in the backing
+ buffer, starting as position start. The starting
+ position is measured in bytes and the return value is in
+ terms of byte position in the buffer. The backing buffer is
+ not converted to a string for this operation.
+ @return byte position of the first occurrence of the search
+ string in the UTF-8 buffer or -1 if not found]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Note: For performance reasons, this call does not clear the
+ underlying byte array that is retrievable via {@link #getBytes()}.
+ In order to free the byte-array memory, call {@link #set(byte[])}
+ with an empty byte array (For example, new byte[0]).]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a Text with the same contents.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ replace is true, then
+ malformed input is replaced with the
+ substitution character, which is U+FFFD. Otherwise the
+ method throws a MalformedInputException.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ replace is true, then
+ malformed input is replaced with the
+ substitution character, which is U+FFFD. Otherwise the
+ method throws a MalformedInputException.
+ @return ByteBuffer: bytes stores at ByteBuffer.array()
+ and length is ByteBuffer.limit()]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ In
+ addition, it provides methods for string traversal without converting the
+ byte array to a string.
Also includes utilities for
+ serializing/deserialing a string, coding/decoding a string, checking if a
+ byte array contains valid UTF8 code, calculating the length of an encoded
+ string.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This is useful when a class may evolve, so that instances written by the
+ old version of the class may still be processed by the new version. To
+ handle this situation, {@link #readFields(DataInput)}
+ implementations should catch {@link VersionMismatchException}.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a VIntWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a VLongWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ out.
+
+ @param out DataOuput to serialize this object into.
+ @throws IOException]]>
+
+
+
+
+
+
+ in.
+
+
For efficiency, implementations should attempt to re-use storage in the
+ existing object where possible.
+
+ @param in DataInput to deseriablize this object from.
+ @throws IOException]]>
+
+
+
+ Any key or value type in the Hadoop Map-Reduce
+ framework implements this interface.
+
+
Implementations typically implement a static read(DataInput)
+ method which constructs a new instance, calls {@link #readFields(DataInput)}
+ and returns the instance.
+
+
Example:
+
+ public class MyWritable implements Writable {
+ // Some data
+ private int counter;
+ private long timestamp;
+
+ // Default constructor to allow (de)serialization
+ MyWritable() { }
+
+ public void write(DataOutput out) throws IOException {
+ out.writeInt(counter);
+ out.writeLong(timestamp);
+ }
+
+ public void readFields(DataInput in) throws IOException {
+ counter = in.readInt();
+ timestamp = in.readLong();
+ }
+
+ public static MyWritable read(DataInput in) throws IOException {
+ MyWritable w = new MyWritable();
+ w.readFields(in);
+ return w;
+ }
+ }
+
]]>
+
+
+
+
+
+
+
+
+ WritableComparables can be compared to each other, typically
+ via Comparators. Any type which is to be used as a
+ key in the Hadoop Map-Reduce framework should implement this
+ interface.
+
+
Note that hashCode() is frequently used in Hadoop to partition
+ keys. It's important that your implementation of hashCode() returns the same
+ result across different instances of the JVM. Note also that the default
+ hashCode() implementation in Object does not
+ satisfy this property.
+
+
Example:
+
+ public class MyWritableComparable implements WritableComparable {
+ // Some data
+ private int counter;
+ private long timestamp;
+
+ public void write(DataOutput out) throws IOException {
+ out.writeInt(counter);
+ out.writeLong(timestamp);
+ }
+
+ public void readFields(DataInput in) throws IOException {
+ counter = in.readInt();
+ timestamp = in.readLong();
+ }
+
+ public int compareTo(MyWritableComparable o) {
+ int thisValue = this.value;
+ int thatValue = o.value;
+ return (thisValue < thatValue ? -1 : (thisValue==thatValue ? 0 : 1));
+ }
+
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + counter;
+ result = prime * result + (int) (timestamp ^ (timestamp >>> 32));
+ return result
+ }
+ }
+
One may optimize compare-intensive operations by overriding
+ {@link #compare(byte[],int,int,byte[],int,int)}. Static utility methods are
+ provided to assist in optimized implementations of this method.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Enum type
+ @param in DataInput to read from
+ @param enumType Class type of Enum
+ @return Enum represented by String read from DataInput
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ len number of bytes in input streamin
+ @param in input stream
+ @param len number of bytes to skip
+ @throws IOException when skipped less number of bytes]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ CompressionCodec for which to get the
+ Compressor
+ @param conf the Configuration object which contains confs for creating or reinit the compressor
+ @return Compressor for the given
+ CompressionCodec from the pool or a new one]]>
+
+
+
+
+
+
+
+
+ CompressionCodec for which to get the
+ Decompressor
+ @return Decompressor for the given
+ CompressionCodec the pool or a new one]]>
+
+
+
+
+
+ Compressor to be returned to the pool]]>
+
+
+
+
+
+ Decompressor to be returned to the
+ pool]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Codec aliases are case insensitive.
+
+ The code alias is the short class name (without the package name).
+ If the short class name ends with 'Codec', then there are two aliases for
+ the codec, the complete short class name and the short class name without
+ the 'Codec' ending. For example for the 'GzipCodec' codec class name the
+ alias are 'gzip' and 'gzipcodec'.
+
+ @param codecName the canonical class name of the codec
+ @return the codec object]]>
+
+
+
+
+
+
+ Codec aliases are case insensitive.
+
+ The code alias is the short class name (without the package name).
+ If the short class name ends with 'Codec', then there are two aliases for
+ the codec, the complete short class name and the short class name without
+ the 'Codec' ending. For example for the 'GzipCodec' codec class name the
+ alias are 'gzip' and 'gzipcodec'.
+
+ @param codecName the canonical class name of the codec
+ @return the codec class]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Implementations are assumed to be buffered. This permits clients to
+ reposition the underlying input stream then call {@link #resetState()},
+ without having to also synchronize client buffers.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true indicating that more input data is required.
+
+ @param b Input data
+ @param off Start offset
+ @param len Length]]>
+
+
+
+
+ true if the input data buffer is empty and
+ #setInput() should be called in order to provide more input.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true if the end of the compressed
+ data output stream has been reached.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true indicating that more input data is required.
+ (Both native and non-native versions of various Decompressors require
+ that the data passed in via b[] remain unmodified until
+ the caller is explicitly notified--via {@link #needsInput()}--that the
+ buffer may be safely modified. With this requirement, an extra
+ buffer-copy can be avoided.)
+
+ @param b Input data
+ @param off Start offset
+ @param len Length]]>
+
+
+
+
+ true if the input data buffer is empty and
+ {@link #setInput(byte[], int, int)} should be called to
+ provide more input.
+
+ @return true if the input data buffer is empty and
+ {@link #setInput(byte[], int, int)} should be called in
+ order to provide more input.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ true if a preset dictionary is needed for decompression.
+ @return true if a preset dictionary is needed for decompression]]>
+
+
+
+
+ true if the end of the decompressed
+ data output stream has been reached. Indicates a concatenated data stream
+ when finished() returns true and {@link #getRemaining()}
+ returns a positive value. finished() will be reset with the
+ {@link #reset()} method.
+ @return true if the end of the decompressed
+ data output stream has been reached.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true and getRemaining() returns a positive value. If
+ {@link #finished()} returns true and getRemaining() returns
+ a zero value, indicates that the end of data stream has been reached and
+ is not a concatenated data stream.
+ @return The number of bytes remaining in the compressed data buffer.]]>
+
+
+
+
+ true and {@link #getRemaining()} returns a positive value,
+ reset() is called before processing of the next data stream in the
+ concatenated data stream. {@link #finished()} will be reset and will
+ return false when reset() is called.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Seek by key or by file offset.
+
+ The memory footprint of a TFile includes the following:
+
+
Some constant overhead of reading or writing a compressed block.
+
+
Each compressed block requires one compression/decompression codec for
+ I/O.
+
Temporary space to buffer the key.
+
Temporary space to buffer the value (for TFile.Writer only). Values are
+ chunk encoded, so that we buffer at most one chunk of user data. By default,
+ the chunk buffer is 1MB. Reading chunked value does not require additional
+ memory.
+
+
TFile index, which is proportional to the total number of Data Blocks.
+ The total amount of memory needed to hold the index can be estimated as
+ (56+AvgKeySize)*NumBlocks.
+
MetaBlock index, which is proportional to the total number of Meta
+ Blocks.The total amount of memory needed to hold the index for Meta Blocks
+ can be estimated as (40+AvgMetaBlockName)*NumMetaBlock.
+
+
+ The behavior of TFile can be customized by the following variables through
+ Configuration:
+
+
tfile.io.chunk.size: Value chunk size. Integer (in bytes). Default
+ to 1MB. Values of the length less than the chunk size is guaranteed to have
+ known value length in read time (See
+ {@link TFile.Reader.Scanner.Entry#isValueLengthKnown()}).
+
tfile.fs.output.buffer.size: Buffer size used for
+ FSDataOutputStream. Integer (in bytes). Default to 256KB.
+
tfile.fs.input.buffer.size: Buffer size used for
+ FSDataInputStream. Integer (in bytes). Default to 256KB.
+
+
+ Suggestions on performance optimization.
+
+
Minimum block size. We recommend a setting of minimum block size between
+ 256KB to 1MB for general usage. Larger block size is preferred if files are
+ primarily for sequential access. However, it would lead to inefficient random
+ access (because there are more data to decompress). Smaller blocks are good
+ for random access, but require more memory to hold the block index, and may
+ be slower to create (because we must flush the compressor stream at the
+ conclusion of each data block, which leads to an FS I/O flush). Further, due
+ to the internal caching in Compression codec, the smallest possible block
+ size would be around 20KB-30KB.
+
The current implementation does not offer true multi-threading for
+ reading. The implementation uses FSDataInputStream seek()+read(), which is
+ shown to be much faster than positioned-read call in single thread mode.
+ However, it also means that if multiple threads attempt to access the same
+ TFile (using multiple scanners) simultaneously, the actual I/O is carried out
+ sequentially even if they access different DFS blocks.
+
Compression codec. Use "none" if the data is not very compressable (by
+ compressable, I mean a compression ratio at least 2:1). Generally, use "lzo"
+ as the starting point for experimenting. "gz" overs slightly better
+ compression ratio over "lzo" but requires 4x CPU to compress and 2x CPU to
+ decompress, comparing to "lzo".
+
File system buffering, if the underlying FSDataInputStream and
+ FSDataOutputStream is already adequately buffered; or if applications
+ reads/writes keys and values in large buffers, we can reduce the sizes of
+ input/output buffering in TFile layer by setting the configuration parameters
+ "tfile.fs.input.buffer.size" and "tfile.fs.output.buffer.size".
+
+
+ Some design rationale behind TFile can be found at Hadoop-3315.]]>
+
+
+
+
+
+
+
+
+
+
+ Utils#writeVLong(out, n).
+
+ @param out
+ output stream
+ @param n
+ The integer to be encoded
+ @throws IOException
+ @see Utils#writeVLong(DataOutput, long)]]>
+
+
+
+
+
+
+
+
+
if n in [-32, 127): encode in one byte with the actual value.
+ Otherwise,
+
if n in [-20*2^8, 20*2^8): encode in two bytes: byte[0] = n/256 - 52;
+ byte[1]=n&0xff. Otherwise,
+
if n IN [-16*2^16, 16*2^16): encode in three bytes: byte[0]=n/2^16 -
+ 88; byte[1]=(n>>8)&0xff; byte[2]=n&0xff. Otherwise,
+
if n in [-8*2^24, 8*2^24): encode in four bytes: byte[0]=n/2^24 - 112;
+ byte[1] = (n>>16)&0xff; byte[2] = (n>>8)&0xff; byte[3]=n&0xff. Otherwise:
+
if n in [-2^31, 2^31): encode in five bytes: byte[0]=-125; byte[1] =
+ (n>>24)&0xff; byte[2]=(n>>16)&0xff; byte[3]=(n>>8)&0xff; byte[4]=n&0xff;
+
if n in [-2^39, 2^39): encode in six bytes: byte[0]=-124; byte[1] =
+ (n>>32)&0xff; byte[2]=(n>>24)&0xff; byte[3]=(n>>16)&0xff;
+ byte[4]=(n>>8)&0xff; byte[5]=n&0xff
+
if n in [-2^47, 2^47): encode in seven bytes: byte[0]=-123; byte[1] =
+ (n>>40)&0xff; byte[2]=(n>>32)&0xff; byte[3]=(n>>24)&0xff;
+ byte[4]=(n>>16)&0xff; byte[5]=(n>>8)&0xff; byte[6]=n&0xff;
+
if n in [-2^55, 2^55): encode in eight bytes: byte[0]=-122; byte[1] =
+ (n>>48)&0xff; byte[2] = (n>>40)&0xff; byte[3]=(n>>32)&0xff;
+ byte[4]=(n>>24)&0xff; byte[5]=(n>>16)&0xff; byte[6]=(n>>8)&0xff;
+ byte[7]=n&0xff;
+
if n in [-2^63, 2^63): encode in nine bytes: byte[0]=-121; byte[1] =
+ (n>>54)&0xff; byte[2] = (n>>48)&0xff; byte[3] = (n>>40)&0xff;
+ byte[4]=(n>>32)&0xff; byte[5]=(n>>24)&0xff; byte[6]=(n>>16)&0xff;
+ byte[7]=(n>>8)&0xff; byte[8]=n&0xff;
+
+
+ @param out
+ output stream
+ @param n
+ the integer number
+ @throws IOException]]>
+
if (FB in [-72, -33]), return (FB+52)<<8 + NB[0]&0xff;
+
if (FB in [-104, -73]), return (FB+88)<<16 + (NB[0]&0xff)<<8 +
+ NB[1]&0xff;
+
if (FB in [-120, -105]), return (FB+112)<<24 + (NB[0]&0xff)<<16 +
+ (NB[1]&0xff)<<8 + NB[2]&0xff;
+
if (FB in [-128, -121]), return interpret NB[FB+129] as a signed
+ big-endian integer.
+
+ @param in
+ input stream
+ @return the decoded long integer.
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Type of the input key.
+ @param list
+ The list
+ @param key
+ The input key.
+ @param cmp
+ Comparator for the key.
+ @return The index to the desired element if it exists; or list.size()
+ otherwise.]]>
+
+
+
+
+
+
+
+
+ Type of the input key.
+ @param list
+ The list
+ @param key
+ The input key.
+ @param cmp
+ Comparator for the key.
+ @return The index to the desired element if it exists; or list.size()
+ otherwise.]]>
+
+
+
+
+
+
+
+ Type of the input key.
+ @param list
+ The list
+ @param key
+ The input key.
+ @return The index to the desired element if it exists; or list.size()
+ otherwise.]]>
+
+
+
+
+
+
+
+ Type of the input key.
+ @param list
+ The list
+ @param key
+ The input key.
+ @return The index to the desired element if it exists; or list.size()
+ otherwise.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ An experimental {@link Serialization} for Java {@link Serializable} classes.
+
+ @see JavaSerializationComparator]]>
+
+
+
+
+
+
+
+
+
+
+ A {@link RawComparator} that uses a {@link JavaSerialization}
+ {@link Deserializer} to deserialize objects that are then compared via
+ their {@link Comparable} interfaces.
+
+ @param
+ @see JavaSerialization]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+This package provides a mechanism for using different serialization frameworks
+in Hadoop. The property "io.serializations" defines a list of
+{@link org.apache.hadoop.io.serializer.Serialization}s that know how to create
+{@link org.apache.hadoop.io.serializer.Serializer}s and
+{@link org.apache.hadoop.io.serializer.Deserializer}s.
+
+
+
+To add a new serialization framework write an implementation of
+{@link org.apache.hadoop.io.serializer.Serialization} and add its name to the
+"io.serializations" property.
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ avro.reflect.pkgs or implement
+ {@link AvroReflectSerializable} interface.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+This package provides Avro serialization in Hadoop. This can be used to
+serialize/deserialize Avro types in Hadoop.
+
+
+
+Use {@link org.apache.hadoop.io.serializer.avro.AvroSpecificSerialization} for
+serialization of classes generated by Avro's 'specific' compiler.
+
+
+
+Use {@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization} for
+other classes.
+{@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization} work for
+any class which is either in the package list configured via
+{@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization#AVRO_REFLECT_PACKAGES}
+or implement {@link org.apache.hadoop.io.serializer.avro.AvroReflectSerializable}
+interface.
+
{@link MetricsSource} generate and update metrics information.
+
{@link MetricsSink} consume the metrics information
+
+
+ {@link MetricsSource} and {@link MetricsSink} register with the metrics
+ system. Implementations of {@link MetricsSystem} polls the
+ {@link MetricsSource}s periodically and pass the {@link MetricsRecord}s to
+ {@link MetricsSink}.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ (aggregate).
+ Filter out entries that don't have at least minSamples.
+
+ @return a map of peer DataNode Id to the average latency to that
+ node seen over the measurement period.]]>
+
+
+
+
+
+
+
+
+
+
+ This class maintains a group of rolling average metrics. It implements the
+ algorithm of rolling average, i.e. a number of sliding windows are kept to
+ roll over and evict old subsets of samples. Each window has a subset of
+ samples in a stream, where sub-sum and sub-total are collected. All sub-sums
+ and sub-totals in all windows will be aggregated to final-sum and final-total
+ used to compute final average, which is called rolling average.
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This class is a metrics sink that uses
+ {@link org.apache.hadoop.fs.FileSystem} to write the metrics logs. Every
+ roll interval a new directory will be created under the path specified by the
+ basepath property. All metrics will be logged to a file in the
+ current interval's directory in a file named <hostname>.log, where
+ <hostname> is the name of the host on which the metrics logging
+ process is running. The base path is set by the
+ <prefix>.sink.<instance>.basepath property. The
+ time zone used to create the current interval's directory name is GMT. If
+ the basepath property isn't specified, it will default to
+ "/tmp", which is the temp directory on whatever default file
+ system is configured for the cluster.
+
+
The <prefix>.sink.<instance>.ignore-error
+ property controls whether an exception is thrown when an error is encountered
+ writing a log file. The default value is true. When set to
+ false, file errors are quietly swallowed.
+
+
The roll-interval property sets the amount of time before
+ rolling the directory. The default value is 1 hour. The roll interval may
+ not be less than 1 minute. The property's value should be given as
+ number unit, where number is an integer value, and
+ unit is a valid unit. Valid units are minute, hour,
+ and day. The units are case insensitive and may be abbreviated or
+ plural. If no units are specified, hours are assumed. For example,
+ "2", "2h", "2 hour", and
+ "2 hours" are all valid ways to specify two hours.
+
+
The roll-offset-interval-millis property sets the upper
+ bound on a random time interval (in milliseconds) that is used to delay
+ before the initial roll. All subsequent rolls will happen an integer
+ number of roll intervals after the initial roll, hence retaining the original
+ offset. The purpose of this property is to insert some variance in the roll
+ times so that large clusters using this sink on every node don't cause a
+ performance impact on HDFS by rolling simultaneously. The default value is
+ 30000 (30s). When writing to HDFS, as a rule of thumb, the roll offset in
+ millis should be no less than the number of sink instances times 5.
+
+
The primary use of this class is for logging to HDFS. As it uses
+ {@link org.apache.hadoop.fs.FileSystem} to access the target file system,
+ however, it can be used to write to the local file system, Amazon S3, or any
+ other supported file system. The base path for the sink will determine the
+ file system used. An unqualified path will write to the default file system
+ set by the configuration.
+
+
Not all file systems support the ability to append to files. In file
+ systems without the ability to append to files, only one writer can write to
+ a file at a time. To allow for concurrent writes from multiple daemons on a
+ single host, the source property is used to set unique headers
+ for the log files. The property should be set to the name of
+ the source daemon, e.g. namenode. The value of the
+ source property should typically be the same as the property's
+ prefix. If this property is not set, the source is taken to be
+ unknown.
+
+
Instead of appending to an existing file, by default the sink
+ will create a new file with a suffix of ".<n>&quet;, where
+ n is the next lowest integer that isn't already used in a file name,
+ similar to the Hadoop daemon logs. NOTE: the file with the highest
+ sequence number is the newest file, unlike the Hadoop daemon logs.
+
+
For file systems that allow append, the sink supports appending to the
+ existing file instead. If the allow-append property is set to
+ true, the sink will instead append to the existing file on file systems that
+ support appends. By default, the allow-append property is
+ false.
+
+
Note that when writing to HDFS with allow-append set to true,
+ there is a minimum acceptable number of data nodes. If the number of data
+ nodes drops below that minimum, the append will succeed, but reading the
+ data will fail with an IOException in the DataStreamer class. The minimum
+ number of data nodes required for a successful append is generally 2 or
+ 3.
+
+
Note also that when writing to HDFS, the file size information is not
+ updated until the file is closed (at the end of the interval) even though
+ the data is being written successfully. This is a known HDFS limitation that
+ exists because of the performance cost of updating the metadata. See
+ HDFS-5478.
+
+
When using this sink in a secure (Kerberos) environment, two additional
+ properties must be set: keytab-key and
+ principal-key. keytab-key should contain the key by
+ which the keytab file can be found in the configuration, for example,
+ yarn.nodemanager.keytab. principal-key should
+ contain the key by which the principal can be found in the configuration,
+ for example, yarn.nodemanager.principal.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ CollectD StatsD plugin).
+
+ To configure this plugin, you will need to add the following
+ entries to your hadoop-metrics2.properties file:
+
+
+ *.sink.statsd.class=org.apache.hadoop.metrics2.sink.StatsDSink
+ [prefix].sink.statsd.server.host=
+ [prefix].sink.statsd.server.port=
+ [prefix].sink.statsd.skip.hostname=true|false (optional)
+ [prefix].sink.statsd.service.name=NameNode (name you want for service)
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ,name="
+ Where the and are the supplied parameters.
+
+ @param serviceName
+ @param nameName
+ @param theMbean - the MBean to register
+ @return the named used to register the MBean]]>
+
+
+
+
+
+
+
+
+ ,name="
+ Where the and are the supplied parameters.
+
+ @param serviceName
+ @param nameName
+ @param properties - Key value pairs to define additional JMX ObjectName
+ properties.
+ @param theMbean - the MBean to register
+ @return the named used to register the MBean]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ hostname or hostname:port. If
+ the specs string is null, defaults to localhost:defaultPort.
+
+ @param specs server specs (see description)
+ @param defaultPort the default port if not specified
+ @return a list of InetSocketAddress objects.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This method is used when parts of Hadoop need know whether to apply
+ single rack vs multi-rack policies, such as during block placement.
+ Such algorithms behave differently if they are on multi-switch systems.
+
+
+ @return true if the mapping thinks that it is on a single switch]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This predicate simply assumes that all mappings not derived from
+ this class are multi-switch.
+ @param mapping the mapping to query
+ @return true if the base class says it is single switch, or the mapping
+ is not derived from this class.]]>
+
+
+
+ It is not mandatory to
+ derive {@link DNSToSwitchMapping} implementations from it, but it is strongly
+ recommended, as it makes it easy for the Hadoop developers to add new methods
+ to this base class that are automatically picked up by all implementations.
+
+
+ This class does not extend the Configured
+ base class, and should not be changed to do so, as it causes problems
+ for subclasses. The constructor of the Configured calls
+ the {@link #setConf(Configuration)} method, which will call into the
+ subclasses before they have been fully constructed.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ If a name cannot be resolved to a rack, the implementation
+ should return {@link NetworkTopology#DEFAULT_RACK}. This
+ is what the bundled implementations do, though it is not a formal requirement
+
+ @param names the list of hosts to resolve (can be empty)
+ @return list of resolved network paths.
+ If names is empty, the returned list is also empty]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Calling {@link #setConf(Configuration)} will trigger a
+ re-evaluation of the configuration settings and so be used to
+ set up the mapping script.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This will get called in the superclass constructor, so a check is needed
+ to ensure that the raw mapping is defined before trying to relaying a null
+ configuration.
+ @param conf]]>
+
+
+
+
+
+
+
+
+
+ It contains a static class RawScriptBasedMapping that performs
+ the work: reading the configuration parameters, executing any defined
+ script, handling errors and such like. The outer
+ class extends {@link CachedDNSToSwitchMapping} to cache the delegated
+ queries.
+
+ This DNS mapper's {@link #isSingleSwitch()} predicate returns
+ true if and only if a script is defined.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Simple {@link DNSToSwitchMapping} implementation that reads a 2 column text
+ file. The columns are separated by whitespace. The first column is a DNS or
+ IP address and the second column specifies the rack where the address maps.
+
+
+ This class uses the configuration parameter {@code
+ net.topology.table.file.name} to locate the mapping file.
+
+
+ Calls to {@link #resolve(List)} will look up the address as defined in the
+ mapping file. If no entry corresponding to the address is found, the value
+ {@code /default-rack} is returned.
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ (cause==null ? null : cause.toString()) (which
+ typically contains the class and detail message of cause).
+ @param cause the cause (which is saved for later retrieval by the
+ {@link #getCause()} method). (A null value is
+ permitted, and indicates that the cause is nonexistent or
+ unknown.)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ mapping
+ and mapping]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ /host@realm.
+ @param principalName principal name of format as described above
+ @return host name if the the string conforms to the above format, else null]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ "jack"
+
+ @param userName
+ @return userName without login method]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ the return type of the run method
+ @param action the method to execute
+ @return the value from the run method]]>
+
+
+
+
+
+
+
+ the return type of the run method
+ @param action the method to execute
+ @return the value from the run method
+ @throws IOException if the action throws an IOException
+ @throws Error if the action throws an Error
+ @throws RuntimeException if the action throws a RuntimeException
+ @throws InterruptedException if the action throws an InterruptedException
+ @throws UndeclaredThrowableException if the action throws something else]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ CredentialProvider implementations must be thread safe.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ (cause==null ? null : cause.toString()) (which
+ typically contains the class and detail message of cause).
+ @param cause the cause (which is saved for later retrieval by the
+ {@link #getCause()} method). (A null value is
+ permitted, and indicates that the cause is nonexistent or
+ unknown.)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ does not provide the stack trace for security purposes.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ A User-Agent String is considered to be a browser if it matches
+ any of the regex patterns from browser-useragent-regex; the default
+ behavior is to consider everything a browser that matches the following:
+ "^Mozilla.*,^Opera.*". Subclasses can optionally override
+ this method to use different behavior.
+
+ @param userAgent The User-Agent String, or null if there isn't one
+ @return true if the User-Agent String refers to a browser, false if not]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The type of the token identifier]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ T extends TokenIdentifier]]>
+
+
+
+
+
+
+
+
+
+ DelegationTokenAuthenticatedURL.
+
+ An instance of the default {@link DelegationTokenAuthenticator} will be
+ used.]]>
+
+
+
+
+ DelegationTokenAuthenticatedURL.
+
+ @param authenticator the {@link DelegationTokenAuthenticator} instance to
+ use, if null the default one will be used.]]>
+
+
+
+
+ DelegationTokenAuthenticatedURL using the default
+ {@link DelegationTokenAuthenticator} class.
+
+ @param connConfigurator a connection configurator.]]>
+
+
+
+
+ DelegationTokenAuthenticatedURL.
+
+ @param authenticator the {@link DelegationTokenAuthenticator} instance to
+ use, if null the default one will be used.
+ @param connConfigurator a connection configurator.]]>
+
+
+
+
+
+
+
+
+
+
+
+ The default class is {@link KerberosDelegationTokenAuthenticator}
+
+ @return the delegation token authenticator class to use as default.]]>
+
+
+
+
+
+
+ This method is provided to enable WebHDFS backwards compatibility.
+
+ @param useQueryString TRUE if the token is transmitted in the
+ URL query string, FALSE if the delegation token is transmitted
+ using the {@link DelegationTokenAuthenticator#DELEGATION_TOKEN_HEADER} HTTP
+ header.]]>
+
+
+
+
+ TRUE if the token is transmitted in the URL query
+ string, FALSE if the delegation token is transmitted using the
+ {@link DelegationTokenAuthenticator#DELEGATION_TOKEN_HEADER} HTTP header.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Authenticator.
+
+ @param url the URL to connect to. Only HTTP/S URLs are supported.
+ @param token the authentication token being used for the user.
+ @return an authenticated {@link HttpURLConnection}.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+ Authenticator. If the doAs parameter is not NULL,
+ the request will be done on behalf of the specified doAs user.
+
+ @param url the URL to connect to. Only HTTP/S URLs are supported.
+ @param token the authentication token being used for the user.
+ @param doAs user to do the the request on behalf of, if NULL the request is
+ as self.
+ @return an authenticated {@link HttpURLConnection}.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+ Authenticator
+ for authentication.
+
+ @param url the URL to get the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token being used for the user where the
+ Delegation token will be stored.
+ @param renewer the renewer user.
+ @return a delegation token.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+
+ Authenticator
+ for authentication.
+
+ @param url the URL to get the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token being used for the user where the
+ Delegation token will be stored.
+ @param renewer the renewer user.
+ @param doAsUser the user to do as, which will be the token owner.
+ @return a delegation token.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+ Authenticator for authentication.
+
+ @param url the URL to renew the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token with the Delegation Token to renew.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+ Authenticator for authentication.
+
+ @param url the URL to renew the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token with the Delegation Token to renew.
+ @param doAsUser the user to do as, which will be the token owner.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+ Authenticator.
+
+ @param url the URL to cancel the delegation token from. Only HTTP/S URLs
+ are supported.
+ @param token the authentication token with the Delegation Token to cancel.
+ @throws IOException if an IO error occurred.]]>
+
+
+
+
+
+
+
+
+ Authenticator.
+
+ @param url the URL to cancel the delegation token from. Only HTTP/S URLs
+ are supported.
+ @param token the authentication token with the Delegation Token to cancel.
+ @param doAsUser the user to do as, which will be the token owner.
+ @throws IOException if an IO error occurred.]]>
+
+
+
+ DelegationTokenAuthenticatedURL is a
+ {@link AuthenticatedURL} sub-class with built-in Hadoop Delegation Token
+ functionality.
+
+ The authentication mechanisms supported by default are Hadoop Simple
+ authentication (also known as pseudo authentication) and Kerberos SPNEGO
+ authentication.
+
+ Additional authentication mechanisms can be supported via {@link
+ DelegationTokenAuthenticator} implementations.
+
+ The default {@link DelegationTokenAuthenticator} is the {@link
+ KerberosDelegationTokenAuthenticator} class which supports
+ automatic fallback from Kerberos SPNEGO to Hadoop Simple authentication via
+ the {@link PseudoDelegationTokenAuthenticator} class.
+
+ AuthenticatedURL instances are not thread-safe.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Authenticator
+ for authentication.
+
+ @param url the URL to get the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token being used for the user where the
+ Delegation token will be stored.
+ @param renewer the renewer user.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+
+ Authenticator
+ for authentication.
+
+ @param url the URL to get the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token being used for the user where the
+ Delegation token will be stored.
+ @param renewer the renewer user.
+ @param doAsUser the user to do as, which will be the token owner.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+ Authenticator for authentication.
+
+ @param url the URL to renew the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token with the Delegation Token to renew.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+
+ Authenticator for authentication.
+
+ @param url the URL to renew the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token with the Delegation Token to renew.
+ @param doAsUser the user to do as, which will be the token owner.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+ Authenticator.
+
+ @param url the URL to cancel the delegation token from. Only HTTP/S URLs
+ are supported.
+ @param token the authentication token with the Delegation Token to cancel.
+ @throws IOException if an IO error occurred.]]>
+
+
+
+
+
+
+
+
+
+ Authenticator.
+
+ @param url the URL to cancel the delegation token from. Only HTTP/S URLs
+ are supported.
+ @param token the authentication token with the Delegation Token to cancel.
+ @param doAsUser the user to do as, which will be the token owner.
+ @throws IOException if an IO error occurred.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ KerberosDelegationTokenAuthenticator provides support for
+ Kerberos SPNEGO authentication mechanism and support for Hadoop Delegation
+ Token operations.
+
+ It falls back to the {@link PseudoDelegationTokenAuthenticator} if the HTTP
+ endpoint does not trigger a SPNEGO authentication]]>
+
+
+
+
+
+
+
+
+ PseudoDelegationTokenAuthenticator provides support for
+ Hadoop's pseudo authentication mechanism that accepts
+ the user name specified as a query string parameter and support for Hadoop
+ Delegation Token operations.
+
+ This mimics the model of Hadoop Simple authentication trusting the
+ {@link UserGroupInformation#getCurrentUser()} value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ live.
+ @return a (snapshotted) map of blocker name->description values]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ Do nothing if the service is null or not
+ in a state in which it can be/needs to be stopped.
+
+ The service state is checked before the operation begins.
+ This process is not thread safe.
+ @param service a service or null]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Any long-lived operation here will prevent the service state
+ change from completing in a timely manner.
+
If another thread is somehow invoked from the listener, and
+ that thread invokes the methods of the service (including
+ subclass-specific methods), there is a risk of a deadlock.
+
+
+
+ @param service the service that has changed.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The base implementation logs all arguments at the debug level,
+ then returns the passed in config unchanged.]]>
+
+
+
+
+
+
+ The action is to signal success by returning the exit code 0.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This method is called before {@link #init(Configuration)};
+ Any non-null configuration that is returned from this operation
+ becomes the one that is passed on to that {@link #init(Configuration)}
+ operation.
+
+ This permits implementations to change the configuration before
+ the init operation. As the ServiceLauncher only creates
+ an instance of the base {@link Configuration} class, it is
+ recommended to instantiate any subclass (such as YarnConfiguration)
+ that injects new resources.
+
+ @param config the initial configuration build up by the
+ service launcher.
+ @param args list of arguments passed to the command line
+ after any launcher-specific commands have been stripped.
+ @return the configuration to init the service with.
+ Recommended: pass down the config parameter with any changes
+ @throws Exception any problem]]>
+
+
+
+
+
+
+ The return value becomes the exit code of the launched process.
+
+ If an exception is raised, the policy is:
+
+
Any subset of {@link org.apache.hadoop.util.ExitUtil.ExitException}:
+ the exception is passed up unmodified.
+
+
Any exception which implements
+ {@link org.apache.hadoop.util.ExitCodeProvider}:
+ A new {@link ServiceLaunchException} is created with the exit code
+ and message of the thrown exception; the thrown exception becomes the
+ cause.
+
Any other exception: a new {@link ServiceLaunchException} is created
+ with the exit code {@link LauncherExitCodes#EXIT_EXCEPTION_THROWN} and
+ the message of the original exception (which becomes the cause).
+
+ @return the exit code
+ @throws org.apache.hadoop.util.ExitUtil.ExitException an exception passed
+ up as the exit code and error text.
+ @throws Exception any exception to report. If it provides an exit code
+ this is used in a wrapping exception.]]>
+
+
+
+
+ The command line options will be passed down before the
+ {@link Service#init(Configuration)} operation is invoked via an
+ invocation of {@link LaunchableService#bindArgs(Configuration, List)}
+ After the service has been successfully started via {@link Service#start()}
+ the {@link LaunchableService#execute()} method is called to execute the
+ service. When this method returns, the service launcher will exit, using
+ the return code from the method as its exit option.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 400 Bad Request}]]>
+
+
+
+
+
+ approximate HTTP equivalent: Approximate HTTP equivalent: {@code 401 Unauthorized}]]>
+
+
+
+
+
+
+
+
+
+
+ Approximate HTTP equivalent: Approximate HTTP equivalent: {@code 403: Forbidden}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 404: Not Found}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 405: Not allowed}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 406: Not Acceptable}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 408: Request Timeout}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 409: Conflict}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 500 Internal Server Error}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 501: Not Implemented}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 503 Service Unavailable}]]>
+
+
+
+
+
+ If raised, this is expected to be raised server-side and likely due
+ to client/server version incompatibilities.
+
+ Approximate HTTP equivalent: {@code 505: Version Not Supported}]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Codes with a YARN prefix are YARN-related.
+
+ Many of the exit codes are designed to resemble HTTP error codes,
+ squashed into a single byte. e.g 44 , "not found" is the equivalent
+ of 404. The various 2XX HTTP error codes aren't followed;
+ the Unix standard of "0" for success is used.
+
+ 0-10: general command issues
+ 30-39: equivalent to the 3XX responses, where those responses are
+ considered errors by the application.
+ 40-49: client-side/CLI/config problems
+ 50-59: service-side problems.
+ 60+ : application specific error codes
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This uses {@link String#format(String, Object...)}
+ to build the formatted exception in the ENGLISH locale.
+
+ If the last argument is a throwable, it becomes the cause of the exception.
+ It will also be used as a parameter for the format.
+ @param exitCode exit code
+ @param format format for message to use in exception
+ @param args list of arguments]]>
+
+
+
+
+ When caught by the ServiceLauncher, it will convert that
+ into a process exit code.
+
+ The {@link #ServiceLaunchException(int, String, Object...)} constructor
+ generates formatted exceptions.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Clients and/or applications can use the provided Progressable
+ to explicitly report progress to the Hadoop framework. This is especially
+ important for operations which take significant amount of time since,
+ in-lieu of the reported progress, the framework has to assume that an error
+ has occurred and time-out the operation.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Class is to be obtained
+ @return the correctly typed Class of the given object.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ kill -0 command or equivalent]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ".cmd" on Windows, or ".sh" otherwise.
+
+ @param parent File parent directory
+ @param basename String script file basename
+ @return File referencing the script in the directory]]>
+
+
+
+
+
+ ".cmd" on Windows, or ".sh" otherwise.
+
+ @param basename String script file basename
+ @return String script file name]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ IOException.
+ @return the path to {@link #WINUTILS_EXE}
+ @throws RuntimeException if the path is not resolvable]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Shell.
+ @return the thread that ran runCommand() that spawned this shell
+ or null if no thread is waiting for this shell to complete]]>
+
+
+
+
+
+
+
+
+
+
+
+ Shell interface.
+ @param cmd shell command to execute.
+ @return the output of the executed command.]]>
+
+
+
+
+
+
+
+
+ Shell interface.
+ @param env the map of environment key=value
+ @param cmd shell command to execute.
+ @param timeout time in milliseconds after which script should be marked timeout
+ @return the output of the executed command.
+ @throws IOException on any problem.]]>
+
+
+
+
+
+
+
+ Shell interface.
+ @param env the map of environment key=value
+ @param cmd shell command to execute.
+ @return the output of the executed command.
+ @throws IOException on any problem.]]>
+
+
+
+
+ Shell processes.
+ Iterates through a map of all currently running Shell
+ processes and destroys them one by one. This method is thread safe]]>
+
+
+
+
+ Shell objects.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ CreateProcess synchronization object.]]>
+
+
+
+
+ os.name property.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Important: caller must check for this value being null.
+ The lack of such checks has led to many support issues being raised.
+
+ @deprecated use one of the exception-raising getter methods,
+ specifically {@link #getWinUtilsPath()} or {@link #getWinUtilsFile()}]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Shell can be used to run shell commands like du or
+ df. It also offers facilities to gate commands by
+ time-intervals.]]>
+
+
+
+
+
+
+
+ ShutdownHookManager singleton.
+
+ @return ShutdownHookManager singleton.]]>
+
+
+
+
+
+
+ Runnable
+ @param priority priority of the shutdownHook.]]>
+
+
+
+
+
+
+
+
+ Runnable
+ @param priority priority of the shutdownHook
+ @param timeout timeout of the shutdownHook
+ @param unit unit of the timeout TimeUnit]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ShutdownHookManager enables running shutdownHook
+ in a deterministic order, higher priority first.
+
+ The JVM runs ShutdownHooks in a non-deterministic order or in parallel.
+ This class registers a single JVM shutdownHook and run all the
+ shutdownHooks registered to it (to this class) in order based on their
+ priority.
+
+ Unless a hook was registered with a shutdown explicitly set through
+ {@link #addShutdownHook(Runnable, int, long, TimeUnit)},
+ the shutdown time allocated to it is set by the configuration option
+ {@link CommonConfigurationKeysPublic#SERVICE_SHUTDOWN_TIMEOUT} in
+ {@code core-site.xml}, with a default value of
+ {@link CommonConfigurationKeysPublic#SERVICE_SHUTDOWN_TIMEOUT_DEFAULT}
+ seconds.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Tool, is the standard for any Map-Reduce tool/application.
+ The tool/application should delegate the handling of
+
+ standard command-line options to {@link ToolRunner#run(Tool, String[])}
+ and only handle its custom arguments.
+
+
Here is how a typical Tool is implemented:
+
+ public class MyApp extends Configured implements Tool {
+
+ public int run(String[] args) throws Exception {
+ // Configuration processed by ToolRunner
+ Configuration conf = getConf();
+
+ // Create a JobConf using the processed conf
+ JobConf job = new JobConf(conf, MyApp.class);
+
+ // Process custom command-line options
+ Path in = new Path(args[1]);
+ Path out = new Path(args[2]);
+
+ // Specify various job-specific parameters
+ job.setJobName("my-app");
+ job.setInputPath(in);
+ job.setOutputPath(out);
+ job.setMapperClass(MyMapper.class);
+ job.setReducerClass(MyReducer.class);
+
+ // Submit the job, then poll for progress until the job is complete
+ RunningJob runningJob = JobClient.runJob(job);
+ if (runningJob.isSuccessful()) {
+ return 0;
+ } else {
+ return 1;
+ }
+ }
+
+ public static void main(String[] args) throws Exception {
+ // Let ToolRunner handle generic command-line options
+ int res = ToolRunner.run(new Configuration(), new MyApp(), args);
+
+ System.exit(res);
+ }
+ }
+
+
+ @see GenericOptionsParser
+ @see ToolRunner]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Tool by {@link Tool#run(String[])}, after
+ parsing with the given generic arguments. Uses the given
+ Configuration, or builds one if null.
+
+ Sets the Tool's configuration with the possibly modified
+ version of the conf.
+
+ @param conf Configuration for the Tool.
+ @param tool Tool to run.
+ @param args command-line arguments to the tool.
+ @return exit code of the {@link Tool#run(String[])} method.]]>
+
+
+
+
+
+
+
+ Tool with its Configuration.
+
+ Equivalent to run(tool.getConf(), tool, args).
+
+ @param tool Tool to run.
+ @param args command-line arguments to the tool.
+ @return exit code of the {@link Tool#run(String[])} method.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ToolRunner can be used to run classes implementing
+ Tool interface. It works in conjunction with
+ {@link GenericOptionsParser} to parse the
+
+ generic hadoop command line arguments and modifies the
+ Configuration of the Tool. The
+ application-specific options are passed along without being modified.
+
+
+ @see Tool
+ @see GenericOptionsParser]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ this filter.
+ @param nbHash The number of hash function to consider.
+ @param hashType type of the hashing function (see
+ {@link org.apache.hadoop.util.hash.Hash}).]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Bloom filter, as defined by Bloom in 1970.
+
+ The Bloom filter is a data structure that was introduced in 1970 and that has been adopted by
+ the networking research community in the past decade thanks to the bandwidth efficiencies that it
+ offers for the transmission of set membership information between networked hosts. A sender encodes
+ the information into a bit vector, the Bloom filter, that is more compact than a conventional
+ representation. Computation and space costs for construction are linear in the number of elements.
+ The receiver uses the filter to test whether various elements are members of the set. Though the
+ filter will occasionally return a false positive, it will never return a false negative. When creating
+ the filter, the sender can choose its desired point in a trade-off between the false positive rate and the size.
+
+
+
+
+
+
+
+
+
+
+
+
+
+ this filter.
+ @param nbHash The number of hash function to consider.
+ @param hashType type of the hashing function (see
+ {@link org.apache.hadoop.util.hash.Hash}).]]>
+
+
+
+
+
+
+
+
+ this counting Bloom filter.
+
+ Invariant: nothing happens if the specified key does not belong to this counter Bloom filter.
+ @param key The key to remove.]]>
+
+
+
+
+
+
+
+
+
+
+
+ key -> count map.
+
NOTE: due to the bucket size of this filter, inserting the same
+ key more than 15 times will cause an overflow at all filter positions
+ associated with this key, and it will significantly increase the error
+ rate for this and other keys. For this reason the filter can only be
+ used to store small count values 0 <= N << 15.
+ @param key key to be tested
+ @return 0 if the key is not present. Otherwise, a positive value v will
+ be returned such that v == count with probability equal to the
+ error rate of this filter, and v > count otherwise.
+ Additionally, if the filter experienced an underflow as a result of
+ {@link #delete(Key)} operation, the return value may be lower than the
+ count with the probability of the false negative rate of such
+ filter.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ counting Bloom filter, as defined by Fan et al. in a ToN
+ 2000 paper.
+
+ A counting Bloom filter is an improvement to standard a Bloom filter as it
+ allows dynamic additions and deletions of set membership information. This
+ is achieved through the use of a counting vector instead of a bit vector.
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Builds an empty Dynamic Bloom filter.
+ @param vectorSize The number of bits in the vector.
+ @param nbHash The number of hash function to consider.
+ @param hashType type of the hashing function (see
+ {@link org.apache.hadoop.util.hash.Hash}).
+ @param nr The threshold for the maximum number of keys to record in a
+ dynamic Bloom filter row.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ dynamic Bloom filter, as defined in the INFOCOM 2006 paper.
+
+ A dynamic Bloom filter (DBF) makes use of a s * m bit matrix but
+ each of the s rows is a standard Bloom filter. The creation
+ process of a DBF is iterative. At the start, the DBF is a 1 * m
+ bit matrix, i.e., it is composed of a single standard Bloom filter.
+ It assumes that nr elements are recorded in the
+ initial bit vector, where nr <= n (n is
+ the cardinality of the set A to record in the filter).
+
+ As the size of A grows during the execution of the application,
+ several keys must be inserted in the DBF. When inserting a key into the DBF,
+ one must first get an active Bloom filter in the matrix. A Bloom filter is
+ active when the number of recorded keys, nr, is
+ strictly less than the current cardinality of A, n.
+ If an active Bloom filter is found, the key is inserted and
+ nr is incremented by one. On the other hand, if there
+ is no active Bloom filter, a new one is created (i.e., a new row is added to
+ the matrix) according to the current size of A and the element
+ is added in this new Bloom filter and the nr value of
+ this new Bloom filter is set to one. A given key is said to belong to the
+ DBF if the k positions are set to one in one of the matrix rows.
+
+
+
+
+
+
+
+
+ Builds a hash function that must obey to a given maximum number of returned values and a highest value.
+ @param maxValue The maximum highest returned value.
+ @param nbHash The number of resulting hashed values.
+ @param hashType type of the hashing function (see {@link Hash}).]]>
+
+
+
+
+ this hash function. A NOOP]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The idea is to randomly select a bit to reset.]]>
+
+
+
+
+
+ The idea is to select the bit to reset that will generate the minimum
+ number of false negative.]]>
+
+
+
+
+
+ The idea is to select the bit to reset that will remove the maximum number
+ of false positive.]]>
+
+
+
+
+
+ The idea is to select the bit to reset that will, at the same time, remove
+ the maximum number of false positve while minimizing the amount of false
+ negative generated.]]>
+
+
+
+
+ Originally created by
+ European Commission One-Lab Project 034819.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ this filter.
+ @param nbHash The number of hash function to consider.
+ @param hashType type of the hashing function (see
+ {@link org.apache.hadoop.util.hash.Hash}).]]>
+
+
+
+
+
+
+
+
+ this retouched Bloom filter.
+
+ Invariant: if the false positive is null, nothing happens.
+ @param key The false positive key to add.]]>
+
+
+
+
+
+ this retouched Bloom filter.
+ @param coll The collection of false positive.]]>
+
+
+
+
+
+ this retouched Bloom filter.
+ @param keys The list of false positive.]]>
+
+
+
+
+
+ this retouched Bloom filter.
+ @param keys The array of false positive.]]>
+
+
+
+
+
+
+ this retouched Bloom filter.
+ @param scheme The selective clearing scheme to apply.]]>
+
+
+
+
+
+
+
+
+
+
+
+ retouched Bloom filter, as defined in the CoNEXT 2006 paper.
+
+ It allows the removal of selected false positives at the cost of introducing
+ random false negatives, and with the benefit of eliminating some random false
+ positives at the same time.
+
+
+
+
+
+
+
+
+
+
+
diff --git a/hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_3.3.3.xml b/hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_3.3.3.xml
new file mode 100644
index 0000000000000..448df9ddd686b
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_3.3.3.xml
@@ -0,0 +1,39037 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ UnsupportedOperationException
+
+ If a key is deprecated in favor of multiple keys, they are all treated as
+ aliases of each other, and setting any one of them resets all the others
+ to the new value.
+
+ If you have multiple deprecation entries to add, it is more efficient to
+ use #addDeprecations(DeprecationDelta[] deltas) instead.
+
+ @param key to be deprecated
+ @param newKeys list of keys that take up the values of deprecated key
+ @param customMessage depcrication message
+ @deprecated use {@link #addDeprecation(String key, String newKey,
+ String customMessage)} instead]]>
+
+
+
+
+
+
+
+ UnsupportedOperationException
+
+ If you have multiple deprecation entries to add, it is more efficient to
+ use #addDeprecations(DeprecationDelta[] deltas) instead.
+
+ @param key to be deprecated
+ @param newKey key that take up the values of deprecated key
+ @param customMessage deprecation message]]>
+
+
+
+
+
+
+ UnsupportedOperationException
+
+ If a key is deprecated in favor of multiple keys, they are all treated as
+ aliases of each other, and setting any one of them resets all the others
+ to the new value.
+
+ If you have multiple deprecation entries to add, it is more efficient to
+ use #addDeprecations(DeprecationDelta[] deltas) instead.
+
+ @param key Key that is to be deprecated
+ @param newKeys list of keys that take up the values of deprecated key
+ @deprecated use {@link #addDeprecation(String key, String newKey)} instead]]>
+
+
+
+
+
+
+ UnsupportedOperationException
+
+ If you have multiple deprecation entries to add, it is more efficient to
+ use #addDeprecations(DeprecationDelta[] deltas) instead.
+
+ @param key Key that is to be deprecated
+ @param newKey key that takes up the value of deprecated key]]>
+
+
+
+
+
+ key is deprecated.
+
+ @param key the parameter which is to be checked for deprecation
+ @return true if the key is deprecated and
+ false otherwise.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ final.
+
+ @param name resource to be added, the classpath is examined for a file
+ with that name.]]>
+
+
+
+
+
+
+
+
+
+ final.
+
+ @param url url of the resource to be added, the local filesystem is
+ examined directly to find the resource, without referring to
+ the classpath.]]>
+
+
+
+
+
+
+
+
+
+ final.
+
+ @param file file-path of resource to be added, the local filesystem is
+ examined directly to find the resource, without referring to
+ the classpath.]]>
+
+
+
+
+
+
+
+
+
+ final.
+
+ WARNING: The contents of the InputStream will be cached, by this method.
+ So use this sparingly because it does increase the memory consumption.
+
+ @param in InputStream to deserialize the object from. In will be read from
+ when a get or set is called next. After it is read the stream will be
+ closed.]]>
+
+
+
+
+
+
+
+
+
+
+ final.
+
+ @param in InputStream to deserialize the object from.
+ @param name the name of the resource because InputStream.toString is not
+ very descriptive some times.]]>
+
+
+
+
+
+
+
+
+
+
+ final.
+
+ @param conf Configuration object from which to load properties]]>
+
+
+
+
+
+
+
+
+
+
+ name property, null if
+ no such property exists. If the key is deprecated, it returns the value of
+ the first key which replaces the deprecated key and is not null.
+
+ Values are processed for variable expansion
+ before being returned.
+
+ As a side effect get loads the properties from the sources if called for
+ the first time as a lazy init.
+
+ @param name the property name, will be trimmed before get value.
+ @return the value of the name or its replacing property,
+ or null if no such property exists.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name property, but only for
+ names which have no valid value, usually non-existent or commented
+ out in XML.
+
+ @param name the property name
+ @return true if the property name exists without value]]>
+
+
+
+
+
+ name property as a trimmed String,
+ null if no such property exists.
+ If the key is deprecated, it returns the value of
+ the first key which replaces the deprecated key and is not null
+
+ Values are processed for variable expansion
+ before being returned.
+
+ @param name the property name.
+ @return the value of the name or its replacing property,
+ or null if no such property exists.]]>
+
+
+
+
+
+
+ name property as a trimmed String,
+ defaultValue if no such property exists.
+ See @{Configuration#getTrimmed} for more details.
+
+ @param name the property name.
+ @param defaultValue the property default value.
+ @return the value of the name or defaultValue
+ if it is not set.]]>
+
+
+
+
+
+ name property, without doing
+ variable expansion.If the key is
+ deprecated, it returns the value of the first key which replaces
+ the deprecated key and is not null.
+
+ @param name the property name.
+ @return the value of the name property or
+ its replacing property and null if no such property exists.]]>
+
+
+
+
+
+
+ value of the name property. If
+ name is deprecated or there is a deprecated name associated to it,
+ it sets the value to both names. Name will be trimmed before put into
+ configuration.
+
+ @param name property name.
+ @param value property value.]]>
+
+
+
+
+
+
+
+ value of the name property. If
+ name is deprecated, it also sets the value to
+ the keys that replace the deprecated key. Name will be trimmed before put
+ into configuration.
+
+ @param name property name.
+ @param value property value.
+ @param source the place that this configuration value came from
+ (For debugging).
+ @throws IllegalArgumentException when the value or name is null.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name. If the key is deprecated,
+ it returns the value of the first key which replaces the deprecated key
+ and is not null.
+ If no such property exists,
+ then defaultValue is returned.
+
+ @param name property name, will be trimmed before get value.
+ @param defaultValue default value.
+ @return property value, or defaultValue if the property
+ doesn't exist.]]>
+
+
+
+
+
+
+ name property as an int.
+
+ If no such property exists, the provided default value is returned,
+ or if the specified value is not a valid int,
+ then an error is thrown.
+
+ @param name property name.
+ @param defaultValue default value.
+ @throws NumberFormatException when the value is invalid
+ @return property value as an int,
+ or defaultValue.]]>
+
+
+
+
+
+ name property as a set of comma-delimited
+ int values.
+
+ If no such property exists, an empty array is returned.
+
+ @param name property name
+ @return property value interpreted as an array of comma-delimited
+ int values]]>
+
+
+
+
+
+
+ name property to an int.
+
+ @param name property name.
+ @param value int value of the property.]]>
+
+
+
+
+
+
+ name property as a long.
+ If no such property exists, the provided default value is returned,
+ or if the specified value is not a valid long,
+ then an error is thrown.
+
+ @param name property name.
+ @param defaultValue default value.
+ @throws NumberFormatException when the value is invalid
+ @return property value as a long,
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property as a long or
+ human readable format. If no such property exists, the provided default
+ value is returned, or if the specified value is not a valid
+ long or human readable format, then an error is thrown. You
+ can use the following suffix (case insensitive): k(kilo), m(mega), g(giga),
+ t(tera), p(peta), e(exa)
+
+ @param name property name.
+ @param defaultValue default value.
+ @throws NumberFormatException when the value is invalid
+ @return property value as a long,
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property to a long.
+
+ @param name property name.
+ @param value long value of the property.]]>
+
+
+
+
+
+
+ name property as a float.
+ If no such property exists, the provided default value is returned,
+ or if the specified value is not a valid float,
+ then an error is thrown.
+
+ @param name property name.
+ @param defaultValue default value.
+ @throws NumberFormatException when the value is invalid
+ @return property value as a float,
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property to a float.
+
+ @param name property name.
+ @param value property value.]]>
+
+
+
+
+
+
+ name property as a double.
+ If no such property exists, the provided default value is returned,
+ or if the specified value is not a valid double,
+ then an error is thrown.
+
+ @param name property name.
+ @param defaultValue default value.
+ @throws NumberFormatException when the value is invalid
+ @return property value as a double,
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property to a double.
+
+ @param name property name.
+ @param value property value.]]>
+
+
+
+
+
+
+ name property as a boolean.
+ If no such property is specified, or if the specified value is not a valid
+ boolean, then defaultValue is returned.
+
+ @param name property name.
+ @param defaultValue default value.
+ @return property value as a boolean,
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property to a boolean.
+
+ @param name property name.
+ @param value boolean value of the property.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name property to the given type. This
+ is equivalent to set(<name>, value.toString()).
+ @param name property name
+ @param value new value
+ @param enumeration type]]>
+
+
+
+
+
+
+ enumeration type
+ @throws IllegalArgumentException If mapping is illegal for the type
+ provided
+ @return enumeration type]]>
+
+
+
+
+
+
+
+ name to the given time duration. This
+ is equivalent to set(<name>, value + <time suffix>).
+ @param name Property name
+ @param value Time duration
+ @param unit Unit of time]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name property as a Pattern.
+ If no such property is specified, or if the specified value is not a valid
+ Pattern, then DefaultValue is returned.
+ Note that the returned value is NOT trimmed by this method.
+
+ @param name property name
+ @param defaultValue default value
+ @return property value as a compiled Pattern, or defaultValue]]>
+
+
+
+
+
+
+ Pattern.
+ If the pattern is passed as null, sets the empty pattern which results in
+ further calls to getPattern(...) returning the default value.
+
+ @param name property name
+ @param pattern new value]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name property as
+ a collection of Strings.
+ If no such property is specified then empty collection is returned.
+
+ This is an optimized version of {@link #getStrings(String)}
+
+ @param name property name.
+ @return property value as a collection of Strings.]]>
+
+
+
+
+
+ name property as
+ an array of Strings.
+ If no such property is specified then null is returned.
+
+ @param name property name.
+ @return property value as an array of Strings,
+ or null.]]>
+
+
+
+
+
+
+ name property as
+ an array of Strings.
+ If no such property is specified then default value is returned.
+
+ @param name property name.
+ @param defaultValue The default value
+ @return property value as an array of Strings,
+ or default value.]]>
+
+
+
+
+
+ name property as
+ a collection of Strings, trimmed of the leading and trailing whitespace.
+ If no such property is specified then empty Collection is returned.
+
+ @param name property name.
+ @return property value as a collection of Strings, or empty Collection]]>
+
+
+
+
+
+ name property as
+ an array of Strings, trimmed of the leading and trailing whitespace.
+ If no such property is specified then an empty array is returned.
+
+ @param name property name.
+ @return property value as an array of trimmed Strings,
+ or empty array.]]>
+
+
+
+
+
+
+ name property as
+ an array of Strings, trimmed of the leading and trailing whitespace.
+ If no such property is specified then default value is returned.
+
+ @param name property name.
+ @param defaultValue The default value
+ @return property value as an array of trimmed Strings,
+ or default value.]]>
+
+
+
+
+
+
+ name property as
+ as comma delimited values.
+
+ @param name property name.
+ @param values The values]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ hostProperty as a
+ InetSocketAddress. If hostProperty is
+ null, addressProperty will be used. This
+ is useful for cases where we want to differentiate between host
+ bind address and address clients should use to establish connection.
+
+ @param hostProperty bind host property name.
+ @param addressProperty address property name.
+ @param defaultAddressValue the default value
+ @param defaultPort the default port
+ @return InetSocketAddress]]>
+
+
+
+
+
+
+
+ name property as a
+ InetSocketAddress.
+ @param name property name.
+ @param defaultAddress the default value
+ @param defaultPort the default port
+ @return InetSocketAddress]]>
+
+
+
+
+
+
+ name property as
+ a host:port.]]>
+
+
+
+
+
+
+
+
+ name property as a host:port. The wildcard
+ address is replaced with the local host's address. If the host and address
+ properties are configured the host component of the address will be combined
+ with the port component of the addr to generate the address. This is to allow
+ optional control over which host name is used in multi-home bind-host
+ cases where a host can have multiple names
+ @param hostProperty the bind-host configuration name
+ @param addressProperty the service address configuration name
+ @param defaultAddressValue the service default address configuration value
+ @param addr InetSocketAddress of the service listener
+ @return InetSocketAddress for clients to connect]]>
+
+
+
+
+
+
+ name property as a host:port. The wildcard
+ address is replaced with the local host's address.
+ @param name property name.
+ @param addr InetSocketAddress of a listener to store in the given property
+ @return InetSocketAddress for clients to connect]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name property
+ as an array of Class.
+ The value of the property specifies a list of comma separated class names.
+ If no such property is specified, then defaultValue is
+ returned.
+
+ @param name the property name.
+ @param defaultValue default value.
+ @return property value as a Class[],
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property as a Class.
+ If no such property is specified, then defaultValue is
+ returned.
+
+ @param name the conf key name.
+ @param defaultValue default value.
+ @return property value as a Class,
+ or defaultValue.]]>
+
+
+
+
+
+
+
+ name property as a Class
+ implementing the interface specified by xface.
+
+ If no such property is specified, then defaultValue is
+ returned.
+
+ An exception is thrown if the returned class does not implement the named
+ interface.
+
+ @param name the conf key name.
+ @param defaultValue default value.
+ @param xface the interface implemented by the named class.
+ @return property value as a Class,
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property as a List
+ of objects implementing the interface specified by xface.
+
+ An exception is thrown if any of the classes does not exist, or if it does
+ not implement the named interface.
+
+ @param name the property name.
+ @param xface the interface implemented by the classes named by
+ name.
+ @return a List of objects implementing xface.]]>
+
+
+
+
+
+
+
+ name property to the name of a
+ theClass implementing the given interface xface.
+
+ An exception is thrown if theClass does not implement the
+ interface xface.
+
+ @param name property name.
+ @param theClass property value.
+ @param xface the interface implemented by the named class.]]>
+
+
+
+
+
+
+
+ dirsProp with
+ the given path. If dirsProp contains multiple directories,
+ then one is chosen based on path's hash code. If the selected
+ directory does not exist, an attempt is made to create it.
+
+ @param dirsProp directory in which to locate the file.
+ @param path file-path.
+ @return local file under the directory with the given path.]]>
+
+
+
+
+
+
+
+ dirsProp with
+ the given path. If dirsProp contains multiple directories,
+ then one is chosen based on path's hash code. If the selected
+ directory does not exist, an attempt is made to create it.
+
+ @param dirsProp directory in which to locate the file.
+ @param path file-path.
+ @return local file under the directory with the given path.]]>
+
+
+
+
+
+
+
+
+
+
+
+ name.
+
+ @param name configuration resource name.
+ @return an input stream attached to the resource.]]>
+
+
+
+
+
+ name.
+
+ @param name configuration resource name.
+ @return a reader attached to the resource.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ String
+ key-value pairs in the configuration.
+
+ @return an iterator over the entries.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ When property name is not empty and the property exists in the
+ configuration, this method writes the property and its attributes
+ to the {@link Writer}.
+
+
+
+ When property name is null or empty, this method writes all the
+ configuration properties and their attributes to the {@link Writer}.
+
+
+
+ When property name is not empty but the property doesn't exist in
+ the configuration, this method throws an {@link IllegalArgumentException}.
+
+
+ @param out the writer to write to.]]>
+
+
+
+
+
+
+
+
+
+
+ When propertyName is not empty, and the property exists
+ in the configuration, the format of the output would be,
+
+ When propertyName is not empty, and the property is not
+ found in the configuration, this method will throw an
+ {@link IllegalArgumentException}.
+
+
+
+ @param config the configuration
+ @param propertyName property name
+ @param out the Writer to write to
+ @throws IOException
+ @throws IllegalArgumentException when property name is not
+ empty and the property is not found in configuration]]>
+
+
+
+
+
+
+
+
+ { "properties" :
+ [ { key : "key1",
+ value : "value1",
+ isFinal : "key1.isFinal",
+ resource : "key1.resource" },
+ { key : "key2",
+ value : "value2",
+ isFinal : "ke2.isFinal",
+ resource : "key2.resource" }
+ ]
+ }
+
+
+ It does not output the properties of the configuration object which
+ is loaded from an input stream.
+
+
+ @param config the configuration
+ @param out the Writer to write to
+ @throws IOException]]>
+
Configurations are specified by resources. A resource contains a set of
+ name/value pairs as XML data. Each resource is named by either a
+ String or by a {@link Path}. If named by a String,
+ then the classpath is examined for a file with that name. If named by a
+ Path, then the local filesystem is examined directly, without
+ referring to the classpath.
+
+
Unless explicitly turned off, Hadoop by default specifies two
+ resources, loaded in-order from the classpath:
core-site.xml: Site-specific configuration for a given hadoop
+ installation.
+
+ Applications may add additional resources, which are loaded
+ subsequent to these resources in the order they are added.
+
+
Final Parameters
+
+
Configuration parameters may be declared final.
+ Once a resource declares a value final, no subsequently-loaded
+ resource can alter that value.
+ For example, one might define a final parameter with:
+
When conf.get("tempdir") is called, then ${basedir}
+ will be resolved to another property in this Configuration, while
+ ${user.name} would then ordinarily be resolved to the value
+ of the System property with that name.
+
When conf.get("otherdir") is called, then ${env.BASE_DIR}
+ will be resolved to the value of the ${BASE_DIR} environment variable.
+ It supports ${env.NAME:-default} and ${env.NAME-default} notations.
+ The former is resolved to "default" if ${NAME} environment variable is undefined
+ or its value is empty.
+ The latter behaves the same way only if ${NAME} is undefined.
+
By default, warnings will be given to any deprecated configuration
+ parameters and these are suppressible by configuring
+ log4j.logger.org.apache.hadoop.conf.Configuration.deprecation in
+ log4j.properties file.
+
+
Tags
+
+
Optionally we can tag related properties together by using tag
+ attributes. System tags are defined by hadoop.tags.system property. Users
+ can define there own custom tags in hadoop.tags.custom property.
+
+
Properties marked with tags can be retrieved with conf
+ .getAllPropertiesByTag("HDFS") or conf.getAllPropertiesByTags
+ (Arrays.asList("YARN","SECURITY")).
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This implementation generates the key material and calls the
+ {@link #createKey(String, byte[], Options)} method.
+
+ @param name the base name of the key
+ @param options the options for the new key.
+ @return the version name of the first version of the key.
+ @throws IOException
+ @throws NoSuchAlgorithmException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This implementation generates the key material and calls the
+ {@link #rollNewVersion(String, byte[])} method.
+
+ @param name the basename of the key
+ @return the name of the new version of the key
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ KeyProvider implementations must be thread safe.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ NULL if
+ a provider for the specified URI scheme could not be found.
+ @throws IOException thrown if the provider failed to initialize.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ uri has syntax error]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ uri is
+ not found]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ uri
+ determines a configuration property name,
+ fs.AbstractFileSystem.scheme.impl whose value names the
+ AbstractFileSystem class.
+
+ The entire URI and conf is passed to the AbstractFileSystem factory method.
+
+ @param uri for the file system to be created.
+ @param conf which is passed to the file system impl.
+
+ @return file system for the given URI.
+
+ @throws UnsupportedFileSystemException if the file system for
+ uri is not supported.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ In some FileSystem implementations such as HDFS metadata
+ synchronization is essential to guarantee consistency of read requests
+ particularly in HA setting.
+ @throws IOException
+ @throws UnsupportedOperationException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ } describing modifications
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+
+ } describing entries to remove
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ } describing modifications, must
+ include entries for user, group, and others for compatibility with
+ permission bits.
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+ } which returns each AclStatus
+ @throws IOException if an ACL could not be read]]>
+
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to modify
+ @param name xattr name.
+ @param value xattr value.
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to modify
+ @param name xattr name.
+ @param value xattr value.
+ @param flag xattr set flag
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attribute
+ @param name xattr name.
+ @return byte[] xattr value.
+ @throws IOException]]>
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+
+ @return {@literal Map} describing the XAttrs of the file
+ or directory
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @param names XAttr names.
+ @return {@literal Map} describing the XAttrs of the file
+ or directory
+ @throws IOException]]>
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @return {@literal Map} describing the XAttrs of the file
+ or directory
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to remove extended attribute
+ @param name xattr name
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ BlockLocation(offset: 0, length: BLOCK_SIZE,
+ hosts: {"host1:9866", "host2:9866, host3:9866"})
+
+
+ And if the file is erasure-coded, each BlockLocation represents a logical
+ block groups. Value offset is the offset of a block group in the file and
+ value length is the total length of a block group. Hosts of a BlockLocation
+ are the datanodes that holding all the data blocks and parity blocks of a
+ block group.
+ Suppose we have a RS_3_2 coded file (3 data units and 2 parity units).
+ A BlockLocation example will be like:
+
+
+ Please refer to
+ {@link FileSystem#getFileBlockLocations(FileStatus, long, long)} or
+ {@link FileContext#getFileBlockLocations(Path, long, long)}
+ for more examples.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ After a successful call, {@code buf.position()} will be advanced by the
+ number of bytes read and {@code buf.limit()} will be unchanged.
+
+ In the case of an exception, the state of the buffer (the contents of the
+ buffer, the {@code buf.position()}, the {@code buf.limit()}, etc.) is
+ undefined, and callers should be prepared to recover from this
+ eventuality.
+
+ Callers should use {@link StreamCapabilities#hasCapability(String)} with
+ {@link StreamCapabilities#PREADBYTEBUFFER} to check if the underlying
+ stream supports this interface, otherwise they might get a
+ {@link UnsupportedOperationException}.
+
+ Implementations should treat 0-length requests as legitimate, and must not
+ signal an error upon their receipt.
+
+ This does not change the current offset of a file, and is thread-safe.
+
+ @param position position within file
+ @param buf the ByteBuffer to receive the results of the read operation.
+ @return the number of bytes read, possibly zero, or -1 if reached
+ end-of-stream
+ @throws IOException if there is some error performing the read]]>
+
+
+
+
+
+
+
+
+ This operation provides similar semantics to
+ {@link #read(long, ByteBuffer)}, the difference is that this method is
+ guaranteed to read data until the {@link ByteBuffer} is full, or until
+ the end of the data stream is reached.
+
+ @param position position within file
+ @param buf the ByteBuffer to receive the results of the read operation.
+ @throws IOException if there is some error performing the read
+ @throws EOFException the end of the data was reached before
+ the read operation completed
+ @see #read(long, ByteBuffer)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ After a successful call, {@code buf.position()} will be advanced by the
+ number of bytes read and {@code buf.limit()} will be unchanged.
+
+ In the case of an exception, the state of the buffer (the contents of the
+ buffer, the {@code buf.position()}, the {@code buf.limit()}, etc.) is
+ undefined, and callers should be prepared to recover from this
+ eventuality.
+
+ Callers should use {@link StreamCapabilities#hasCapability(String)} with
+ {@link StreamCapabilities#READBYTEBUFFER} to check if the underlying
+ stream supports this interface, otherwise they might get a
+ {@link UnsupportedOperationException}.
+
+ Implementations should treat 0-length requests as legitimate, and must not
+ signal an error upon their receipt.
+
+ @param buf
+ the ByteBuffer to receive the results of the read operation.
+ @return the number of bytes read, possibly zero, or -1 if
+ reach end-of-stream
+ @throws IOException
+ if there is some error performing the read]]>
+
CREATE - to create a file if it does not exist,
+ else throw FileAlreadyExists.
+
APPEND - to append to a file if it exists,
+ else throw FileNotFoundException.
+
OVERWRITE - to truncate a file if it exists,
+ else throw FileNotFoundException.
+
CREATE|APPEND - to create a file if it does not exist,
+ else append to an existing file.
+
CREATE|OVERWRITE - to create a file if it does not exist,
+ else overwrite an existing file.
+
SYNC_BLOCK - to force closed blocks to the disk device.
+ In addition {@link Syncable#hsync()} should be called after each write,
+ if true synchronous behavior is required.
+
LAZY_PERSIST - Create the block on transient storage (RAM) if
+ available.
+
APPEND_NEWBLOCK - Append data to a new block instead of end of the last
+ partial block.
+
+
+ Following combinations are not valid and will result in
+ {@link HadoopIllegalArgumentException}:
+
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws AccessControlException if access denied
+ @throws IOException If an IO Error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+
+ RuntimeExceptions:
+ @throws InvalidPathException If path f is not valid]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Progress - to report progress on the operation - default null
+
Permission - umask is applied against permission: default is
+ FsPermissions:getDefault()
+
+
CreateParent - create missing parent path; default is to not
+ to create parents
+
The defaults for the following are SS defaults of the file
+ server implementing the target path. Not all parameters make sense
+ for all kinds of file system - eg. localFS ignores Blocksize,
+ replication, checksum
+
+
BufferSize - buffersize used in FSDataOutputStream
+
Blocksize - block size for file blocks
+
ReplicationFactor - replication for blocks
+
ChecksumParam - Checksum parameters. server default is used
+ if not specified.
+
+
+
+ @return {@link FSDataOutputStream} for created file
+
+ @throws AccessControlException If access is denied
+ @throws FileAlreadyExistsException If file f already exists
+ @throws FileNotFoundException If parent of f does not exist
+ and createParent is false
+ @throws ParentNotDirectoryException If parent of f is not a
+ directory.
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+
+ RuntimeExceptions:
+ @throws InvalidPathException If path f is not valid]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ dir already
+ exists
+ @throws FileNotFoundException If parent of dir does not exist
+ and createParent is false
+ @throws ParentNotDirectoryException If parent of dir is not a
+ directory
+ @throws UnsupportedFileSystemException If file system for dir
+ is not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+
+ RuntimeExceptions:
+ @throws InvalidPathException If path dir is not valid]]>
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+
+ RuntimeExceptions:
+ @throws InvalidPathException If path f is invalid]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f
+ is not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
Fails if path is a directory.
+
Fails if path does not exist.
+
Fails if path is not closed.
+
Fails if new size is greater than current size.
+
+ @param f The path to the file to be truncated
+ @param newLength The size the file is to be truncated to
+
+ @return true if the file has been truncated to the desired
+ newLength and is immediately available to be reused for
+ write operations such as append, or
+ false if a background process of adjusting the length of
+ the last block has been started, and clients should wait for it to
+ complete before proceeding with further file updates.
+
+ @throws AccessControlException If access is denied
+ @throws FileNotFoundException If file f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Fails if src is a file and dst is a directory.
+
Fails if src is a directory and dst is a file.
+
Fails if the parent of dst does not exist or is a file.
+
+
+ If OVERWRITE option is not passed as an argument, rename fails if the dst
+ already exists.
+
+ If OVERWRITE option is passed as an argument, rename overwrites the dst if
+ it is a file or an empty directory. Rename fails if dst is a non-empty
+ directory.
+
+ Note that atomicity of rename is dependent on the file system
+ implementation. Please refer to the file system documentation for details
+
+
+ @param src path to be renamed
+ @param dst new path after rename
+
+ @throws AccessControlException If access is denied
+ @throws FileAlreadyExistsException If dst already exists and
+ options has {@link Options.Rename#OVERWRITE}
+ option false.
+ @throws FileNotFoundException If src does not exist
+ @throws ParentNotDirectoryException If parent of dst is not a
+ directory
+ @throws UnsupportedFileSystemException If file system for src
+ and dst is not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f
+ is not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+
+ RuntimeExceptions:
+ @throws HadoopIllegalArgumentException If username or
+ groupname is invalid.]]>
+
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If the given path does not refer to a symlink
+ or an I/O error occurred]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Given a path referring to a symlink of form:
+
+ {@literal <---}X{@literal --->}
+ fs://host/A/B/link
+ {@literal <-----}Y{@literal ----->}
+
+ In this path X is the scheme and authority that identify the file system,
+ and Y is the path leading up to the final path component "link". If Y is
+ a symlink itself then let Y' be the target of Y and X' be the scheme and
+ authority of Y'. Symlink targets may:
+
+ 1. Fully qualified URIs
+
+ fs://hostX/A/B/file Resolved according to the target file system.
+
+ 2. Partially qualified URIs (eg scheme but no host)
+
+ fs:///A/B/file Resolved according to the target file system. Eg resolving
+ a symlink to hdfs:///A results in an exception because
+ HDFS URIs must be fully qualified, while a symlink to
+ file:///A will not since Hadoop's local file systems
+ require partially qualified URIs.
+
+ 3. Relative paths
+
+ path Resolves to [Y'][path]. Eg if Y resolves to hdfs://host/A and path
+ is "../B/file" then [Y'][path] is hdfs://host/B/file
+
+ 4. Absolute paths
+
+ path Resolves to [X'][path]. Eg if Y resolves hdfs://host/A/B and path
+ is "/file" then [X][path] is hdfs://host/file
+
+
+ @param target the target of the symbolic link
+ @param link the path to be created that points to target
+ @param createParent if true then missing parent dirs are created if
+ false then parent must exist
+
+
+ @throws AccessControlException If access is denied
+ @throws FileAlreadyExistsException If file link already exists
+ @throws FileNotFoundException If target does not exist
+ @throws ParentNotDirectoryException If parent of link is not a
+ directory.
+ @throws UnsupportedFileSystemException If file system for
+ target or link is not supported
+ @throws IOException If an I/O error occurred]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+ f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ } describing
+ modifications
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+
+ } describing entries
+ to remove
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ } describing
+ modifications, must include entries for user, group, and others for
+ compatibility with permission bits.
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+ } which returns
+ each AclStatus
+ @throws IOException if an ACL could not be read]]>
+
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to modify
+ @param name xattr name.
+ @param value xattr value.
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to modify
+ @param name xattr name.
+ @param value xattr value.
+ @param flag xattr set flag
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attribute
+ @param name xattr name.
+ @return byte[] xattr value.
+ @throws IOException]]>
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @return Map{@literal <}String, byte[]{@literal >} describing the XAttrs
+ of the file or directory
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @param names XAttr names.
+ @return Map{@literal <}String, byte[]{@literal >} describing the XAttrs
+ of the file or directory
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to remove extended attribute
+ @param name xattr name
+ @throws IOException]]>
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @return List{@literal <}String{@literal >} of the XAttr names of the
+ file or directory
+ @throws IOException]]>
+
+
+
+
+
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Path Names
+
+ The Hadoop file system supports a URI namespace and URI names. This enables
+ multiple types of file systems to be referenced using fully-qualified URIs.
+ Two common Hadoop file system implementations are
+
+
the local file system: file:///path
+
the HDFS file system: hdfs://nnAddress:nnPort/path
+
+
+ The Hadoop file system also supports additional naming schemes besides URIs.
+ Hadoop has the concept of a default file system, which implies a
+ default URI scheme and authority. This enables slash-relative names
+ relative to the default FS, which are more convenient for users and
+ application writers. The default FS is typically set by the user's
+ environment, though it can also be manually specified.
+
+
+ Hadoop also supports working-directory-relative names, which are paths
+ relative to the current working directory (similar to Unix). The working
+ directory can be in a different file system than the default FS.
+
+ Thus, Hadoop path names can be specified as one of the following:
+
+
a fully-qualified URI: scheme://authority/path (e.g.
+ hdfs://nnAddress:nnPort/foo/bar)
+
a slash-relative name: path relative to the default file system (e.g.
+ /foo/bar)
+
a working-directory-relative name: path relative to the working dir (e.g.
+ foo/bar)
+
+ Relative paths with scheme (scheme:foo/bar) are illegal.
+
+
Role of FileContext and Configuration Defaults
+
+ The FileContext is the analogue of per-process file-related state in Unix. It
+ contains two properties:
+
+
+
the default file system (for resolving slash-relative names)
+
the umask (for file permissions)
+
+ In general, these properties are obtained from the default configuration file
+ in the user's environment (see {@link Configuration}).
+
+ Further file system properties are specified on the server-side. File system
+ operations default to using these server-side defaults unless otherwise
+ specified.
+
+ The file system related server-side defaults are:
+
+
the home directory (default is "/user/userName")
+
the initial wd (only for local fs)
+
replication factor
+
block size
+
buffer size
+
encryptDataTransfer
+
checksum option. (checksumType and bytesPerChecksum)
+
+
+
Example Usage
+
+ Example 1: use the default config read from the $HADOOP_CONFIG/core.xml.
+ Unspecified values come from core-defaults.xml in the release jar.
+
+
myFContext = FileContext.getFileContext(); // uses the default config
+ // which has your default FS
+
myFContext.create(path, ...);
+
myFContext.setWorkingDir(path);
+
myFContext.open (path, ...);
+
...
+
+ Example 2: Get a FileContext with a specific URI as the default FS
+
+
myFContext = FileContext.getFileContext(URI);
+
myFContext.create(path, ...);
+
...
+
+ Example 3: FileContext with local file system as the default
+
+ If the configuration has the property
+ {@code "fs.$SCHEME.impl.disable.cache"} set to true,
+ a new instance will be created, initialized with the supplied URI and
+ configuration, then returned without being cached.
+
+
+ If the there is a cached FS instance matching the same URI, it will
+ be returned.
+
+
+ Otherwise: a new FS instance will be created, initialized with the
+ configuration and URI, cached and returned to the caller.
+
+
+ @throws IOException if the FileSystem cannot be instantiated.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ if f == null :
+ result = null
+ elif f.getLen() {@literal <=} start:
+ result = []
+ else result = [ locations(FS, b) for b in blocks(FS, p, s, s+l)]
+
+ This call is most helpful with and distributed filesystem
+ where the hostnames of machines that contain blocks of the given file
+ can be determined.
+
+ The default implementation returns an array containing one element:
+
+
+ And if a file is erasure-coded, the returned BlockLocation are logical
+ block groups.
+
+ Suppose we have a RS_3_2 coded file (3 data units and 2 parity units).
+ 1. If the file size is less than one stripe size, say 2 * CELL_SIZE, then
+ there will be one BlockLocation returned, with 0 offset, actual file size
+ and 4 hosts (2 data blocks and 2 parity blocks) hosting the actual blocks.
+ 3. If the file size is less than one group size but greater than one
+ stripe size, then there will be one BlockLocation returned, with 0 offset,
+ actual file size with 5 hosts (3 data blocks and 2 parity blocks) hosting
+ the actual blocks.
+ 4. If the file size is greater than one group size, 3 * BLOCK_SIZE + 123
+ for example, then the result will be like:
+
Fails if the parent of dst does not exist or is a file.
+
+
+ If OVERWRITE option is not passed as an argument, rename fails
+ if the dst already exists.
+
+ If OVERWRITE option is passed as an argument, rename overwrites
+ the dst if it is a file or an empty directory. Rename fails if dst is
+ a non-empty directory.
+
+ Note that atomicity of rename is dependent on the file system
+ implementation. Please refer to the file system documentation for
+ details. This default implementation is non atomic.
+
+ This method is deprecated since it is a temporary method added to
+ support the transition from FileSystem to FileContext for user
+ applications.
+
+ @param src path to be renamed
+ @param dst new path after rename
+ @throws FileNotFoundException src path does not exist, or the parent
+ path of dst does not exist.
+ @throws FileAlreadyExistsException dest path exists and is a file
+ @throws ParentNotDirectoryException if the parent path of dest is not
+ a directory
+ @throws IOException on failure]]>
+
+
+
+
+
+
+
+
+
Fails if path is a directory.
+
Fails if path does not exist.
+
Fails if path is not closed.
+
Fails if new size is greater than current size.
+
+ @param f The path to the file to be truncated
+ @param newLength The size the file is to be truncated to
+
+ @return true if the file has been truncated to the desired
+ newLength and is immediately available to be reused for
+ write operations such as append, or
+ false if a background process of adjusting the length of
+ the last block has been started, and clients should wait for it to
+ complete before proceeding with further file updates.
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default).]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Clean shutdown of the JVM cannot be guaranteed.
+
The time to shut down a FileSystem will depends on the number of
+ files to delete. For filesystems where the cost of checking
+ for the existence of a file/directory and the actual delete operation
+ (for example: object stores) is high, the time to shutdown the JVM can be
+ significantly extended by over-use of this feature.
+
Connectivity problems with a remote filesystem may delay shutdown
+ further, and may cause the files to not be deleted.
+
+ @param f the path to delete.
+ @return true if deleteOnExit is successful, otherwise false.
+ @throws IOException IO failure]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Does not guarantee to return the List of files/directories status in a
+ sorted order.
+
+ Will not return null. Expect IOException upon access error.
+ @param f given path
+ @return the statuses of the files/directories in the given patch
+ @throws FileNotFoundException when the path does not exist
+ @throws IOException see specific implementation]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Does not guarantee to return the List of files/directories status in a
+ sorted order.
+
+ @param f
+ a path name
+ @param filter
+ the user-supplied path filter
+ @return an array of FileStatus objects for the files under the given path
+ after applying the filter
+ @throws FileNotFoundException when the path does not exist
+ @throws IOException see specific implementation]]>
+
+
+
+
+
+
+
+
+ Does not guarantee to return the List of files/directories status in a
+ sorted order.
+
+ @param files
+ a list of paths
+ @return a list of statuses for the files under the given paths after
+ applying the filter default Path filter
+ @throws FileNotFoundException when the path does not exist
+ @throws IOException see specific implementation]]>
+
+
+
+
+
+
+
+
+
+ Does not guarantee to return the List of files/directories status in a
+ sorted order.
+
+ @param files
+ a list of paths
+ @param filter
+ the user-supplied path filter
+ @return a list of statuses for the files under the given paths after
+ applying the filter
+ @throws FileNotFoundException when the path does not exist
+ @throws IOException see specific implementation]]>
+
+
+
+
+
+
+ Return all the files that match filePattern and are not checksum
+ files. Results are sorted by their names.
+
+
+ A filename pattern is composed of regular characters and
+ special pattern matching characters, which are:
+
+
+
+
+
?
+
Matches any single character.
+
+
+
*
+
Matches zero or more characters.
+
+
+
[abc]
+
Matches a single character from character set
+ {a,b,c}.
+
+
+
[a-b]
+
Matches a single character from the character range
+ {a...b}. Note that character a must be
+ lexicographically less than or equal to character b.
+
+
+
[^a]
+
Matches a single character that is not from character set or range
+ {a}. Note that the ^ character must occur
+ immediately to the right of the opening bracket.
+
+
+
\c
+
Removes (escapes) any special meaning of character c.
+
+
+
{ab,cd}
+
Matches a string from the string set {ab, cd}
+
+
+
{ab,c{de,fh}}
+
Matches a string from the string set {ab, cde, cfh}
+
+
+
+
+
+ @param pathPattern a glob specifying a path pattern
+
+ @return an array of paths that match the path pattern
+ @throws IOException IO failure]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws IOException If an I/O error occurred]]>
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws IOException if any I/O error occurred]]>
+
+
+
+
+
+
+
+ p does not exist
+ @throws IOException if any I/O error occurred]]>
+
+
+
+
+
+
+
+
+
+ If the path is a directory,
+ if recursive is false, returns files in the directory;
+ if recursive is true, return files in the subtree rooted at the path.
+ If the path is a file, return the file's status and block locations.
+
+ @param f is the path
+ @param recursive if the subdirectories need to be traversed recursively
+
+ @return an iterator that traverses statuses of the files
+
+ @throws FileNotFoundException when the path does not exist;
+ @throws IOException see specific implementation]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ undefined.
+ @throws IOException IO failure]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ In some FileSystem implementations such as HDFS metadata
+ synchronization is essential to guarantee consistency of read requests
+ particularly in HA setting.
+ @throws IOException
+ @throws UnsupportedOperationException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to modify
+ @param name xattr name.
+ @param value xattr value.
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to modify
+ @param name xattr name.
+ @param value xattr value.
+ @param flag xattr set flag
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attribute
+ @param name xattr name.
+ @return byte[] xattr value.
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @return Map describing the XAttrs of the file or directory
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @param names XAttr names.
+ @return Map describing the XAttrs of the file or directory
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @return List{@literal } of the XAttr names of the file or directory
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to remove extended attribute
+ @param name xattr name
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This is a default method which is intended to be overridden by
+ subclasses. The default implementation returns an empty storage statistics
+ object.
+
+ @return The StorageStatistics for this FileSystem instance.
+ Will never be null.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ All user code that may potentially use the Hadoop Distributed
+ File System should be written to use a FileSystem object or its
+ successor, {@link FileContext}.
+
+
+ The local implementation is {@link LocalFileSystem} and distributed
+ implementation is DistributedFileSystem. There are other implementations
+ for object stores and (outside the Apache Hadoop codebase),
+ third party filesystems.
+
+ Notes
+
+
The behaviour of the filesystem is
+
+ specified in the Hadoop documentation.
+ However, the normative specification of the behavior of this class is
+ actually HDFS: if HDFS does not behave the way these Javadocs or
+ the specification in the Hadoop documentations define, assume that
+ the documentation is incorrect.
+
+
The term {@code FileSystem} refers to an instance of this class.
+
The acronym "FS" is used as an abbreviation of FileSystem.
+
The term {@code filesystem} refers to the distributed/local filesystem
+ itself, rather than the class used to interact with it.
+
The term "file" refers to a file in the remote filesystem,
+ rather than instances of {@code java.io.File}.
+ Consult the filesystem specification document for the requirements
+ of an implementation of this interface.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Exceptions are caught and downgraded to debug logging.
+ @param source source of statistics.
+ @return a string for logging.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Whenever this object's toString() method is called, it evaluates the
+ statistics.
+
+ This is designed to affordable to use in log statements.
+ @param source source of statistics -may be null.
+ @return an object whose toString() operation returns the current values.]]>
+
+
+
+
+
+
+ Whenever this object's toString() method is called, it evaluates the
+ statistics.
+
+ This is for use in log statements where for the cost of creation
+ of this entry is low; it is affordable to use in log statements.
+ @param statistics statistics to stringify -may be null.
+ @return an object whose toString() operation returns the current values.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ It is serializable so that frameworks which can use java serialization
+ to propagate data (Spark, Flink...) can send the statistics
+ back. For this reason, TreeMaps are explicitly used as field types,
+ even though IDEs can recommend use of Map instead.
+ For security reasons, untrusted java object streams should never be
+ deserialized. If for some reason this is required, use
+ {@link #requiredSerializationClasses()} to get the list of classes
+ used when deserializing instances of this object.
+
+
+ It is annotated for correct serializations with jackson2.
+
]]>
+
+
+
+
+
+
+
+
+
+ This is not an atomic option.
+
+ The instance can be serialized, and its
+ {@code toString()} method lists all the values.
+ @param statistics statistics
+ @return a snapshot of the current values.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ It can be used to accrue values so as to dynamically update
+ the mean. If so, know that there is no synchronization
+ on the methods.
+
+
+ If a statistic has 0 samples then it is considered to be empty.
+
+
+ All 'empty' statistics are equivalent, independent of the sum value.
+
+
+ For non-empty statistics, sum and sample values must match
+ for equality.
+
+
+ It is serializable and annotated for correct serializations with jackson2.
+
+
+ Thread safety. The operations to add/copy sample data, are thread safe.
+
+
+
{@link #add(MeanStatistic)}
+
{@link #addSample(long)}
+
{@link #clear()}
+
{@link #setSamplesAndSum(long, long)}
+
{@link #set(MeanStatistic)}
+
{@link #setSamples(long)} and {@link #setSum(long)}
+
+
+ So is the {@link #mean()} method. This ensures that when
+ used to aggregated statistics, the aggregate value and sample
+ count are set and evaluated consistently.
+
+
+ Other methods marked as synchronized because Findbugs overreacts
+ to the idea that some operations to update sum and sample count
+ are synchronized, but that things like equals are not.
+
+ The name of the constants are uppercase, words separated by
+ underscores.
+
+
+ The value of the constants are lowercase of the constant names.
+
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Since these methods are often vendor- or device-specific, operators
+ may implement this interface in order to achieve fencing.
+
+ Fencing is configured by the operator as an ordered list of methods to
+ attempt. Each method will be tried in turn, and the next in the list
+ will only be attempted if the previous one fails. See {@link NodeFencer}
+ for more information.
+
+ If an implementation also implements {@link Configurable} then its
+ setConf method will be called upon instantiation.]]>
+
StaticUserWebFilter - An authorization plugin that makes all
+users a static configured user.
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ public class IntArrayWritable extends ArrayWritable {
+ public IntArrayWritable() {
+ super(IntWritable.class);
+ }
+ }
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a ByteWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ the class of the item
+ @param conf the configuration to store
+ @param item the object to be stored
+ @param keyName the name of the key to use
+ @throws IOException : forwards Exceptions from the underlying
+ {@link Serialization} classes.]]>
+
+
+
+
+
+
+
+
+ the class of the item
+ @param conf the configuration to use
+ @param keyName the name of the key to use
+ @param itemClass the class of the item
+ @return restored object
+ @throws IOException : forwards Exceptions from the underlying
+ {@link Serialization} classes.]]>
+
+
+
+
+
+
+
+
+ the class of the item
+ @param conf the configuration to use
+ @param items the objects to be stored
+ @param keyName the name of the key to use
+ @throws IndexOutOfBoundsException if the items array is empty
+ @throws IOException : forwards Exceptions from the underlying
+ {@link Serialization} classes.]]>
+
+
+
+
+
+
+
+
+ the class of the item
+ @param conf the configuration to use
+ @param keyName the name of the key to use
+ @param itemClass the class of the item
+ @return restored object
+ @throws IOException : forwards Exceptions from the underlying
+ {@link Serialization} classes.]]>
+
+
+
+
+ DefaultStringifier offers convenience methods to store/load objects to/from
+ the configuration.
+
+ @param the class of the objects to stringify]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a DoubleWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ value argument is null or
+ its size is zero, the elementType argument must not be null. If
+ the argument value's size is bigger than zero, the argument
+ elementType is not be used.
+
+ @param value
+ @param elementType]]>
+
+
+
+
+ value should not be null
+ or empty.
+
+ @param value]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ value and elementType. If the value argument
+ is null or its size is zero, the elementType argument must not be
+ null. If the argument value's size is bigger than zero, the
+ argument elementType is not be used.
+
+ @param value
+ @param elementType]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is an EnumSetWritable with the same value,
+ or both are null.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a FloatWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ When two sequence files, which have same Key type but different Value
+ types, are mapped out to reduce, multiple Value types is not allowed.
+ In this case, this class can help you wrap instances with different types.
+
+
+
+ Compared with ObjectWritable, this class is much more effective,
+ because ObjectWritable will append the class declaration as a String
+ into the output file in every Key-Value pair.
+
+
+
+ Generic Writable implements {@link Configurable} interface, so that it will be
+ configured by the framework. The configuration is passed to the wrapped objects
+ implementing {@link Configurable} interface before deserialization.
+
+
+ how to use it:
+ 1. Write your own class, such as GenericObject, which extends GenericWritable.
+ 2. Implements the abstract method getTypes(), defines
+ the classes which will be wrapped in GenericObject in application.
+ Attention: this classes defined in getTypes() method, must
+ implement Writable interface.
+
+
+ @since Nov 8, 2006]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a IntWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ closes the input and output streams
+ at the end.
+
+ @param in InputStrem to read from
+ @param out OutputStream to write to
+ @param conf the Configuration object]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ignore any {@link Throwable} or
+ null pointers. Must only be used for cleanup in exception handlers.
+
+ @param log the log to record problems to at debug level. Can be null.
+ @param closeables the objects to close
+ @deprecated use {@link #cleanupWithLogger(Logger, java.io.Closeable...)}
+ instead]]>
+
+
+
+
+
+
+ ignore any {@link Throwable} or
+ null pointers. Must only be used for cleanup in exception handlers.
+
+ @param logger the log to record problems to at debug level. Can be null.
+ @param closeables the objects to close]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This is better than File#listDir because it does not ignore IOExceptions.
+
+ @param dir The directory to list.
+ @param filter If non-null, the filter to use when listing
+ this directory.
+ @return The list of files in the directory.
+
+ @throws IOException On I/O error]]>
+
+
+
+
+
+
+
+ Borrowed from Uwe Schindler in LUCENE-5588
+ @param fileToSync the file to fsync]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a LongWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ A map is a directory containing two files, the data file,
+ containing all keys and values in the map, and a smaller index
+ file, containing a fraction of the keys. The fraction is determined by
+ {@link Writer#getIndexInterval()}.
+
+
The index file is read entirely into memory. Thus key implementations
+ should try to keep themselves small.
+
+
Map files are created by adding entries in-order. To maintain a large
+ database, perform updates by copying the previous version of a database and
+ merging in a sorted change list, to create a new version of the database in
+ a new file. Sorting large change lists can be done with {@link
+ SequenceFile.Sorter}.]]>
+
SequenceFile provides {@link SequenceFile.Writer},
+ {@link SequenceFile.Reader} and {@link Sorter} classes for writing,
+ reading and sorting respectively.
+
+ There are three SequenceFileWriters based on the
+ {@link CompressionType} used to compress key/value pairs:
+
+
+ Writer : Uncompressed records.
+
+
+ RecordCompressWriter : Record-compressed files, only compress
+ values.
+
+
+ BlockCompressWriter : Block-compressed files, both keys &
+ values are collected in 'blocks'
+ separately and compressed. The size of
+ the 'block' is configurable.
+
+
+
The actual compression algorithm used to compress key and/or values can be
+ specified by using the appropriate {@link CompressionCodec}.
+
+
The recommended way is to use the static createWriter methods
+ provided by the SequenceFile to chose the preferred format.
+
+
The {@link SequenceFile.Reader} acts as the bridge and can read any of the
+ above SequenceFile formats.
+
+
SequenceFile Formats
+
+
Essentially there are 3 different formats for SequenceFiles
+ depending on the CompressionType specified. All of them share a
+ common header described below.
+
+
SequenceFile Header
+
+
+ version - 3 bytes of magic header SEQ, followed by 1 byte of actual
+ version number (e.g. SEQ4 or SEQ6)
+
+
+ keyClassName -key class
+
+
+ valueClassName - value class
+
+
+ compression - A boolean which specifies if compression is turned on for
+ keys/values in this file.
+
+
+ blockCompression - A boolean which specifies if block-compression is
+ turned on for keys/values in this file.
+
+
+ compression codec - CompressionCodec class which is used for
+ compression of keys and/or values (if compression is
+ enabled).
+
+
+ metadata - {@link Metadata} for this file.
+
+
+ sync - A sync marker to denote end of the header.
+
The compressed blocks of key lengths and value lengths consist of the
+ actual lengths of individual keys/values encoded in ZeroCompressedInteger
+ format.
+
+ @see CompressionCodec]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a ShortWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ the class of the objects to stringify]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ position. Note that this
+ method avoids using the converter or doing String instantiation
+ @return the Unicode scalar value at position or -1
+ if the position is invalid or points to a
+ trailing byte]]>
+
+
+
+
+
+
+
+
+
+ what in the backing
+ buffer, starting as position start. The starting
+ position is measured in bytes and the return value is in
+ terms of byte position in the buffer. The backing buffer is
+ not converted to a string for this operation.
+ @return byte position of the first occurrence of the search
+ string in the UTF-8 buffer or -1 if not found]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Note: For performance reasons, this call does not clear the
+ underlying byte array that is retrievable via {@link #getBytes()}.
+ In order to free the byte-array memory, call {@link #set(byte[])}
+ with an empty byte array (For example, new byte[0]).]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a Text with the same contents.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ replace is true, then
+ malformed input is replaced with the
+ substitution character, which is U+FFFD. Otherwise the
+ method throws a MalformedInputException.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ replace is true, then
+ malformed input is replaced with the
+ substitution character, which is U+FFFD. Otherwise the
+ method throws a MalformedInputException.
+ @return ByteBuffer: bytes stores at ByteBuffer.array()
+ and length is ByteBuffer.limit()]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ In
+ addition, it provides methods for string traversal without converting the
+ byte array to a string.
Also includes utilities for
+ serializing/deserialing a string, coding/decoding a string, checking if a
+ byte array contains valid UTF8 code, calculating the length of an encoded
+ string.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This is useful when a class may evolve, so that instances written by the
+ old version of the class may still be processed by the new version. To
+ handle this situation, {@link #readFields(DataInput)}
+ implementations should catch {@link VersionMismatchException}.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a VIntWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a VLongWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ out.
+
+ @param out DataOuput to serialize this object into.
+ @throws IOException]]>
+
+
+
+
+
+
+ in.
+
+
For efficiency, implementations should attempt to re-use storage in the
+ existing object where possible.
+
+ @param in DataInput to deseriablize this object from.
+ @throws IOException]]>
+
+
+
+ Any key or value type in the Hadoop Map-Reduce
+ framework implements this interface.
+
+
Implementations typically implement a static read(DataInput)
+ method which constructs a new instance, calls {@link #readFields(DataInput)}
+ and returns the instance.
+
+
Example:
+
+ public class MyWritable implements Writable {
+ // Some data
+ private int counter;
+ private long timestamp;
+
+ // Default constructor to allow (de)serialization
+ MyWritable() { }
+
+ public void write(DataOutput out) throws IOException {
+ out.writeInt(counter);
+ out.writeLong(timestamp);
+ }
+
+ public void readFields(DataInput in) throws IOException {
+ counter = in.readInt();
+ timestamp = in.readLong();
+ }
+
+ public static MyWritable read(DataInput in) throws IOException {
+ MyWritable w = new MyWritable();
+ w.readFields(in);
+ return w;
+ }
+ }
+
]]>
+
+
+
+
+
+
+
+
+ WritableComparables can be compared to each other, typically
+ via Comparators. Any type which is to be used as a
+ key in the Hadoop Map-Reduce framework should implement this
+ interface.
+
+
Note that hashCode() is frequently used in Hadoop to partition
+ keys. It's important that your implementation of hashCode() returns the same
+ result across different instances of the JVM. Note also that the default
+ hashCode() implementation in Object does not
+ satisfy this property.
+
+
Example:
+
+ public class MyWritableComparable implements
+ WritableComparable{@literal } {
+ // Some data
+ private int counter;
+ private long timestamp;
+
+ public void write(DataOutput out) throws IOException {
+ out.writeInt(counter);
+ out.writeLong(timestamp);
+ }
+
+ public void readFields(DataInput in) throws IOException {
+ counter = in.readInt();
+ timestamp = in.readLong();
+ }
+
+ public int compareTo(MyWritableComparable o) {
+ int thisValue = this.value;
+ int thatValue = o.value;
+ return (thisValue < thatValue ? -1 : (thisValue==thatValue ? 0 : 1));
+ }
+
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + counter;
+ result = prime * result + (int) (timestamp ^ (timestamp >>> 32));
+ return result
+ }
+ }
+
One may optimize compare-intensive operations by overriding
+ {@link #compare(byte[],int,int,byte[],int,int)}. Static utility methods are
+ provided to assist in optimized implementations of this method.]]>
+
+ The code alias is the short class name (without the package name).
+ If the short class name ends with 'Codec', then there are two aliases for
+ the codec, the complete short class name and the short class name without
+ the 'Codec' ending. For example for the 'GzipCodec' codec class name the
+ alias are 'gzip' and 'gzipcodec'.
+
+ @param codecName the canonical class name of the codec
+ @return the codec object]]>
+
+
+
+
+
+
+ Codec aliases are case insensitive.
+
+ The code alias is the short class name (without the package name).
+ If the short class name ends with 'Codec', then there are two aliases for
+ the codec, the complete short class name and the short class name without
+ the 'Codec' ending. For example for the 'GzipCodec' codec class name the
+ alias are 'gzip' and 'gzipcodec'.
+
+ @param codecName the canonical class name of the codec
+ @return the codec class]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Implementations are assumed to be buffered. This permits clients to
+ reposition the underlying input stream then call {@link #resetState()},
+ without having to also synchronize client buffers.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true indicating that more input data is required.
+
+ @param b Input data
+ @param off Start offset
+ @param len Length]]>
+
+
+
+
+ true if the input data buffer is empty and
+ #setInput() should be called in order to provide more input.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true if the end of the compressed
+ data output stream has been reached.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true indicating that more input data is required.
+ (Both native and non-native versions of various Decompressors require
+ that the data passed in via b[] remain unmodified until
+ the caller is explicitly notified--via {@link #needsInput()}--that the
+ buffer may be safely modified. With this requirement, an extra
+ buffer-copy can be avoided.)
+
+ @param b Input data
+ @param off Start offset
+ @param len Length]]>
+
+
+
+
+ true if the input data buffer is empty and
+ {@link #setInput(byte[], int, int)} should be called to
+ provide more input.
+
+ @return true if the input data buffer is empty and
+ {@link #setInput(byte[], int, int)} should be called in
+ order to provide more input.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ true if a preset dictionary is needed for decompression.
+ @return true if a preset dictionary is needed for decompression]]>
+
+
+
+
+ true if the end of the decompressed
+ data output stream has been reached. Indicates a concatenated data stream
+ when finished() returns true and {@link #getRemaining()}
+ returns a positive value. finished() will be reset with the
+ {@link #reset()} method.
+ @return true if the end of the decompressed
+ data output stream has been reached.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true and getRemaining() returns a positive value. If
+ {@link #finished()} returns true and getRemaining() returns
+ a zero value, indicates that the end of data stream has been reached and
+ is not a concatenated data stream.
+ @return The number of bytes remaining in the compressed data buffer.]]>
+
+
+
+
+ true and {@link #getRemaining()} returns a positive value,
+ reset() is called before processing of the next data stream in the
+ concatenated data stream. {@link #finished()} will be reset and will
+ return false when reset() is called.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ io.compression.codecs = org.apache.hadoop.io.compress.PassthroughCodec
+ io.compress.passthrough.extension = .gz
+
+
+ Note: this is not a Splittable codec: it doesn't know the
+ capabilities of the passed in stream. It should be possible to
+ extend this in a subclass: the inner classes are marked as protected
+ to enable this. Do not retrofit splitting to this class..]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Seek by key or by file offset.
+
+ The memory footprint of a TFile includes the following:
+
+
Some constant overhead of reading or writing a compressed block.
+
+
Each compressed block requires one compression/decompression codec for
+ I/O.
+
Temporary space to buffer the key.
+
Temporary space to buffer the value (for TFile.Writer only). Values are
+ chunk encoded, so that we buffer at most one chunk of user data. By default,
+ the chunk buffer is 1MB. Reading chunked value does not require additional
+ memory.
+
+
TFile index, which is proportional to the total number of Data Blocks.
+ The total amount of memory needed to hold the index can be estimated as
+ (56+AvgKeySize)*NumBlocks.
+
MetaBlock index, which is proportional to the total number of Meta
+ Blocks.The total amount of memory needed to hold the index for Meta Blocks
+ can be estimated as (40+AvgMetaBlockName)*NumMetaBlock.
+
+
+ The behavior of TFile can be customized by the following variables through
+ Configuration:
+
+
tfile.io.chunk.size: Value chunk size. Integer (in bytes). Default
+ to 1MB. Values of the length less than the chunk size is guaranteed to have
+ known value length in read time (See
+ {@link TFile.Reader.Scanner.Entry#isValueLengthKnown()}).
+
tfile.fs.output.buffer.size: Buffer size used for
+ FSDataOutputStream. Integer (in bytes). Default to 256KB.
+
tfile.fs.input.buffer.size: Buffer size used for
+ FSDataInputStream. Integer (in bytes). Default to 256KB.
+
+
+ Suggestions on performance optimization.
+
+
Minimum block size. We recommend a setting of minimum block size between
+ 256KB to 1MB for general usage. Larger block size is preferred if files are
+ primarily for sequential access. However, it would lead to inefficient random
+ access (because there are more data to decompress). Smaller blocks are good
+ for random access, but require more memory to hold the block index, and may
+ be slower to create (because we must flush the compressor stream at the
+ conclusion of each data block, which leads to an FS I/O flush). Further, due
+ to the internal caching in Compression codec, the smallest possible block
+ size would be around 20KB-30KB.
+
The current implementation does not offer true multi-threading for
+ reading. The implementation uses FSDataInputStream seek()+read(), which is
+ shown to be much faster than positioned-read call in single thread mode.
+ However, it also means that if multiple threads attempt to access the same
+ TFile (using multiple scanners) simultaneously, the actual I/O is carried out
+ sequentially even if they access different DFS blocks.
+
Compression codec. Use "none" if the data is not very compressable (by
+ compressable, I mean a compression ratio at least 2:1). Generally, use "lzo"
+ as the starting point for experimenting. "gz" overs slightly better
+ compression ratio over "lzo" but requires 4x CPU to compress and 2x CPU to
+ decompress, comparing to "lzo".
+
File system buffering, if the underlying FSDataInputStream and
+ FSDataOutputStream is already adequately buffered; or if applications
+ reads/writes keys and values in large buffers, we can reduce the sizes of
+ input/output buffering in TFile layer by setting the configuration parameters
+ "tfile.fs.input.buffer.size" and "tfile.fs.output.buffer.size".
+
+
+ Some design rationale behind TFile can be found at Hadoop-3315.]]>
+
+
+
+
+
+
+
+
+
+
+ Utils#writeVLong(out, n).
+
+ @param out
+ output stream
+ @param n
+ The integer to be encoded
+ @throws IOException
+ @see Utils#writeVLong(DataOutput, long)]]>
+
+
+
+
+
+
+
+
+
if n in [-32, 127): encode in one byte with the actual value.
+ Otherwise,
+
if n in [-20*2^8, 20*2^8): encode in two bytes: byte[0] = n/256 - 52;
+ byte[1]=n&0xff. Otherwise,
+
if n IN [-16*2^16, 16*2^16): encode in three bytes: byte[0]=n/2^16 -
+ 88; byte[1]=(n>>8)&0xff; byte[2]=n&0xff. Otherwise,
+
if n in [-8*2^24, 8*2^24): encode in four bytes: byte[0]=n/2^24 - 112;
+ byte[1] = (n>>16)&0xff; byte[2] = (n>>8)&0xff;
+ byte[3]=n&0xff.
+ Otherwise:
+
if n in [-2^31, 2^31): encode in five bytes: byte[0]=-125; byte[1] =
+ (n>>24)&0xff; byte[2]=(n>>16)&0xff;
+ byte[3]=(n>>8)&0xff; byte[4]=n&0xff;
+
if n in [-2^39, 2^39): encode in six bytes: byte[0]=-124; byte[1] =
+ (n>>32)&0xff; byte[2]=(n>>24)&0xff;
+ byte[3]=(n>>16)&0xff; byte[4]=(n>>8)&0xff;
+ byte[5]=n&0xff
+
if n in [-2^47, 2^47): encode in seven bytes: byte[0]=-123; byte[1] =
+ (n>>40)&0xff; byte[2]=(n>>32)&0xff;
+ byte[3]=(n>>24)&0xff; byte[4]=(n>>16)&0xff;
+ byte[5]=(n>>8)&0xff; byte[6]=n&0xff;
+
if n in [-2^55, 2^55): encode in eight bytes: byte[0]=-122; byte[1] =
+ (n>>48)&0xff; byte[2] = (n>>40)&0xff;
+ byte[3]=(n>>32)&0xff; byte[4]=(n>>24)&0xff; byte[5]=
+ (n>>16)&0xff; byte[6]=(n>>8)&0xff; byte[7]=n&0xff;
+
if n in [-2^63, 2^63): encode in nine bytes: byte[0]=-121; byte[1] =
+ (n>>54)&0xff; byte[2] = (n>>48)&0xff;
+ byte[3] = (n>>40)&0xff; byte[4]=(n>>32)&0xff;
+ byte[5]=(n>>24)&0xff; byte[6]=(n>>16)&0xff; byte[7]=
+ (n>>8)&0xff; byte[8]=n&0xff;
+
+
+ @param out
+ output stream
+ @param n
+ the integer number
+ @throws IOException]]>
+
if (FB in [-72, -33]), return (FB+52)<<8 + NB[0]&0xff;
+
if (FB in [-104, -73]), return (FB+88)<<16 +
+ (NB[0]&0xff)<<8 + NB[1]&0xff;
+
if (FB in [-120, -105]), return (FB+112)<<24 + (NB[0]&0xff)
+ <<16 + (NB[1]&0xff)<<8 + NB[2]&0xff;
+
if (FB in [-128, -121]), return interpret NB[FB+129] as a signed
+ big-endian integer.
+
+ @param in
+ input stream
+ @return the decoded long integer.
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Type of the input key.
+ @param list
+ The list
+ @param key
+ The input key.
+ @param cmp
+ Comparator for the key.
+ @return The index to the desired element if it exists; or list.size()
+ otherwise.]]>
+
+
+
+
+
+
+
+
+ Type of the input key.
+ @param list
+ The list
+ @param key
+ The input key.
+ @param cmp
+ Comparator for the key.
+ @return The index to the desired element if it exists; or list.size()
+ otherwise.]]>
+
+
+
+
+
+
+
+ Type of the input key.
+ @param list
+ The list
+ @param key
+ The input key.
+ @return The index to the desired element if it exists; or list.size()
+ otherwise.]]>
+
+
+
+
+
+
+
+ Type of the input key.
+ @param list
+ The list
+ @param key
+ The input key.
+ @return The index to the desired element if it exists; or list.size()
+ otherwise.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ An experimental {@link Serialization} for Java {@link Serializable} classes.
+
+ @see JavaSerializationComparator]]>
+
+
+
+
+
+
+
+
+
+
+ A {@link RawComparator} that uses a {@link JavaSerialization}
+ {@link Deserializer} to deserialize objects that are then compared via
+ their {@link Comparable} interfaces.
+
+ @param
+ @see JavaSerialization]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+This package provides a mechanism for using different serialization frameworks
+in Hadoop. The property "io.serializations" defines a list of
+{@link org.apache.hadoop.io.serializer.Serialization}s that know how to create
+{@link org.apache.hadoop.io.serializer.Serializer}s and
+{@link org.apache.hadoop.io.serializer.Deserializer}s.
+
+
+
+To add a new serialization framework write an implementation of
+{@link org.apache.hadoop.io.serializer.Serialization} and add its name to the
+"io.serializations" property.
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ avro.reflect.pkgs or implement
+ {@link AvroReflectSerializable} interface.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+This package provides Avro serialization in Hadoop. This can be used to
+serialize/deserialize Avro types in Hadoop.
+
+
+
+Use {@link org.apache.hadoop.io.serializer.avro.AvroSpecificSerialization} for
+serialization of classes generated by Avro's 'specific' compiler.
+
+
+
+Use {@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization} for
+other classes.
+{@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization} work for
+any class which is either in the package list configured via
+{@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization#AVRO_REFLECT_PACKAGES}
+or implement {@link org.apache.hadoop.io.serializer.avro.AvroReflectSerializable}
+interface.
+
{@link MetricsSource} generate and update metrics information.
+
{@link MetricsSink} consume the metrics information
+
+
+ {@link MetricsSource} and {@link MetricsSink} register with the metrics
+ system. Implementations of {@link MetricsSystem} polls the
+ {@link MetricsSource}s periodically and pass the {@link MetricsRecord}s to
+ {@link MetricsSink}.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ } (aggregate).
+ Filter out entries that don't have at least minSamples.
+
+ @return a map of peer DataNode Id to the average latency to that
+ node seen over the measurement period.]]>
+
+
+
+
+
+
+
+
+
+
+ This class maintains a group of rolling average metrics. It implements the
+ algorithm of rolling average, i.e. a number of sliding windows are kept to
+ roll over and evict old subsets of samples. Each window has a subset of
+ samples in a stream, where sub-sum and sub-total are collected. All sub-sums
+ and sub-totals in all windows will be aggregated to final-sum and final-total
+ used to compute final average, which is called rolling average.
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This class is a metrics sink that uses
+ {@link org.apache.hadoop.fs.FileSystem} to write the metrics logs. Every
+ roll interval a new directory will be created under the path specified by the
+ basepath property. All metrics will be logged to a file in the
+ current interval's directory in a file named <hostname>.log, where
+ <hostname> is the name of the host on which the metrics logging
+ process is running. The base path is set by the
+ <prefix>.sink.<instance>.basepath property. The
+ time zone used to create the current interval's directory name is GMT. If
+ the basepath property isn't specified, it will default to
+ "/tmp", which is the temp directory on whatever default file
+ system is configured for the cluster.
+
+
The <prefix>.sink.<instance>.ignore-error
+ property controls whether an exception is thrown when an error is encountered
+ writing a log file. The default value is true. When set to
+ false, file errors are quietly swallowed.
+
+
The roll-interval property sets the amount of time before
+ rolling the directory. The default value is 1 hour. The roll interval may
+ not be less than 1 minute. The property's value should be given as
+ number unit, where number is an integer value, and
+ unit is a valid unit. Valid units are minute, hour,
+ and day. The units are case insensitive and may be abbreviated or
+ plural. If no units are specified, hours are assumed. For example,
+ "2", "2h", "2 hour", and
+ "2 hours" are all valid ways to specify two hours.
+
+
The roll-offset-interval-millis property sets the upper
+ bound on a random time interval (in milliseconds) that is used to delay
+ before the initial roll. All subsequent rolls will happen an integer
+ number of roll intervals after the initial roll, hence retaining the original
+ offset. The purpose of this property is to insert some variance in the roll
+ times so that large clusters using this sink on every node don't cause a
+ performance impact on HDFS by rolling simultaneously. The default value is
+ 30000 (30s). When writing to HDFS, as a rule of thumb, the roll offset in
+ millis should be no less than the number of sink instances times 5.
+
+
The primary use of this class is for logging to HDFS. As it uses
+ {@link org.apache.hadoop.fs.FileSystem} to access the target file system,
+ however, it can be used to write to the local file system, Amazon S3, or any
+ other supported file system. The base path for the sink will determine the
+ file system used. An unqualified path will write to the default file system
+ set by the configuration.
+
+
Not all file systems support the ability to append to files. In file
+ systems without the ability to append to files, only one writer can write to
+ a file at a time. To allow for concurrent writes from multiple daemons on a
+ single host, the source property is used to set unique headers
+ for the log files. The property should be set to the name of
+ the source daemon, e.g. namenode. The value of the
+ source property should typically be the same as the property's
+ prefix. If this property is not set, the source is taken to be
+ unknown.
+
+
Instead of appending to an existing file, by default the sink
+ will create a new file with a suffix of ".<n>", where
+ n is the next lowest integer that isn't already used in a file name,
+ similar to the Hadoop daemon logs. NOTE: the file with the highest
+ sequence number is the newest file, unlike the Hadoop daemon logs.
+
+
For file systems that allow append, the sink supports appending to the
+ existing file instead. If the allow-append property is set to
+ true, the sink will instead append to the existing file on file systems that
+ support appends. By default, the allow-append property is
+ false.
+
+
Note that when writing to HDFS with allow-append set to true,
+ there is a minimum acceptable number of data nodes. If the number of data
+ nodes drops below that minimum, the append will succeed, but reading the
+ data will fail with an IOException in the DataStreamer class. The minimum
+ number of data nodes required for a successful append is generally 2 or
+ 3.
+
+
Note also that when writing to HDFS, the file size information is not
+ updated until the file is closed (at the end of the interval) even though
+ the data is being written successfully. This is a known HDFS limitation that
+ exists because of the performance cost of updating the metadata. See
+ HDFS-5478.
+
+
When using this sink in a secure (Kerberos) environment, two additional
+ properties must be set: keytab-key and
+ principal-key. keytab-key should contain the key by
+ which the keytab file can be found in the configuration, for example,
+ yarn.nodemanager.keytab. principal-key should
+ contain the key by which the principal can be found in the configuration,
+ for example, yarn.nodemanager.principal.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ CollectD StatsD plugin).
+
+ To configure this plugin, you will need to add the following
+ entries to your hadoop-metrics2.properties file:
+
+
+ *.sink.statsd.class=org.apache.hadoop.metrics2.sink.StatsDSink
+ [prefix].sink.statsd.server.host=
+ [prefix].sink.statsd.server.port=
+ [prefix].sink.statsd.skip.hostname=true|false (optional)
+ [prefix].sink.statsd.service.name=NameNode (name you want for service)
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ,name=}"
+ Where the {@literal and } are the supplied
+ parameters.
+
+ @param serviceName
+ @param nameName
+ @param theMbean - the MBean to register
+ @return the named used to register the MBean]]>
+
+
+
+
+
+
+
+
+ ,name=}"
+ Where the {@literal and } are the supplied
+ parameters.
+
+ @param serviceName
+ @param nameName
+ @param properties - Key value pairs to define additional JMX ObjectName
+ properties.
+ @param theMbean - the MBean to register
+ @return the named used to register the MBean]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ hostname or hostname:port. If
+ the specs string is null, defaults to localhost:defaultPort.
+
+ @param specs server specs (see description)
+ @param defaultPort the default port if not specified
+ @return a list of InetSocketAddress objects.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This method is used when parts of Hadoop need know whether to apply
+ single rack vs multi-rack policies, such as during block placement.
+ Such algorithms behave differently if they are on multi-switch systems.
+
+
+ @return true if the mapping thinks that it is on a single switch]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This predicate simply assumes that all mappings not derived from
+ this class are multi-switch.
+ @param mapping the mapping to query
+ @return true if the base class says it is single switch, or the mapping
+ is not derived from this class.]]>
+
+
+
+ It is not mandatory to
+ derive {@link DNSToSwitchMapping} implementations from it, but it is strongly
+ recommended, as it makes it easy for the Hadoop developers to add new methods
+ to this base class that are automatically picked up by all implementations.
+
+
+ This class does not extend the Configured
+ base class, and should not be changed to do so, as it causes problems
+ for subclasses. The constructor of the Configured calls
+ the {@link #setConf(Configuration)} method, which will call into the
+ subclasses before they have been fully constructed.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ If a name cannot be resolved to a rack, the implementation
+ should return {@link NetworkTopology#DEFAULT_RACK}. This
+ is what the bundled implementations do, though it is not a formal requirement
+
+ @param names the list of hosts to resolve (can be empty)
+ @return list of resolved network paths.
+ If names is empty, the returned list is also empty]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Calling {@link #setConf(Configuration)} will trigger a
+ re-evaluation of the configuration settings and so be used to
+ set up the mapping script.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This will get called in the superclass constructor, so a check is needed
+ to ensure that the raw mapping is defined before trying to relaying a null
+ configuration.
+ @param conf]]>
+
+
+
+
+
+
+
+
+
+ It contains a static class RawScriptBasedMapping that performs
+ the work: reading the configuration parameters, executing any defined
+ script, handling errors and such like. The outer
+ class extends {@link CachedDNSToSwitchMapping} to cache the delegated
+ queries.
+
+ This DNS mapper's {@link #isSingleSwitch()} predicate returns
+ true if and only if a script is defined.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Simple {@link DNSToSwitchMapping} implementation that reads a 2 column text
+ file. The columns are separated by whitespace. The first column is a DNS or
+ IP address and the second column specifies the rack where the address maps.
+
+
+ This class uses the configuration parameter {@code
+ net.topology.table.file.name} to locate the mapping file.
+
+
+ Calls to {@link #resolve(List)} will look up the address as defined in the
+ mapping file. If no entry corresponding to the address is found, the value
+ {@code /default-rack} is returned.
+
+ An instance of the default {@link DelegationTokenAuthenticator} will be
+ used.]]>
+
+
+
+
+ DelegationTokenAuthenticatedURL.
+
+ @param authenticator the {@link DelegationTokenAuthenticator} instance to
+ use, if null the default one will be used.]]>
+
+
+
+
+ DelegationTokenAuthenticatedURL using the default
+ {@link DelegationTokenAuthenticator} class.
+
+ @param connConfigurator a connection configurator.]]>
+
+
+
+
+ DelegationTokenAuthenticatedURL.
+
+ @param authenticator the {@link DelegationTokenAuthenticator} instance to
+ use, if null the default one will be used.
+ @param connConfigurator a connection configurator.]]>
+
+
+
+
+
+
+
+
+
+
+
+ The default class is {@link KerberosDelegationTokenAuthenticator}
+
+ @return the delegation token authenticator class to use as default.]]>
+
+
+
+
+
+
+ This method is provided to enable WebHDFS backwards compatibility.
+
+ @param useQueryString TRUE if the token is transmitted in the
+ URL query string, FALSE if the delegation token is transmitted
+ using the {@link DelegationTokenAuthenticator#DELEGATION_TOKEN_HEADER} HTTP
+ header.]]>
+
+
+
+
+ TRUE if the token is transmitted in the URL query
+ string, FALSE if the delegation token is transmitted using the
+ {@link DelegationTokenAuthenticator#DELEGATION_TOKEN_HEADER} HTTP header.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Authenticator.
+
+ @param url the URL to connect to. Only HTTP/S URLs are supported.
+ @param token the authentication token being used for the user.
+ @return an authenticated {@link HttpURLConnection}.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+ Authenticator. If the doAs parameter is not NULL,
+ the request will be done on behalf of the specified doAs user.
+
+ @param url the URL to connect to. Only HTTP/S URLs are supported.
+ @param token the authentication token being used for the user.
+ @param doAs user to do the the request on behalf of, if NULL the request is
+ as self.
+ @return an authenticated {@link HttpURLConnection}.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+ Authenticator
+ for authentication.
+
+ @param url the URL to get the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token being used for the user where the
+ Delegation token will be stored.
+ @param renewer the renewer user.
+ @return a delegation token.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+
+ Authenticator
+ for authentication.
+
+ @param url the URL to get the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token being used for the user where the
+ Delegation token will be stored.
+ @param renewer the renewer user.
+ @param doAsUser the user to do as, which will be the token owner.
+ @return a delegation token.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+ Authenticator for authentication.
+
+ @param url the URL to renew the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token with the Delegation Token to renew.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+ Authenticator for authentication.
+
+ @param url the URL to renew the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token with the Delegation Token to renew.
+ @param doAsUser the user to do as, which will be the token owner.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+ Authenticator.
+
+ @param url the URL to cancel the delegation token from. Only HTTP/S URLs
+ are supported.
+ @param token the authentication token with the Delegation Token to cancel.
+ @throws IOException if an IO error occurred.]]>
+
+
+
+
+
+
+
+
+ Authenticator.
+
+ @param url the URL to cancel the delegation token from. Only HTTP/S URLs
+ are supported.
+ @param token the authentication token with the Delegation Token to cancel.
+ @param doAsUser the user to do as, which will be the token owner.
+ @throws IOException if an IO error occurred.]]>
+
+
+
+ DelegationTokenAuthenticatedURL is a
+ {@link AuthenticatedURL} sub-class with built-in Hadoop Delegation Token
+ functionality.
+
+ The authentication mechanisms supported by default are Hadoop Simple
+ authentication (also known as pseudo authentication) and Kerberos SPNEGO
+ authentication.
+
+ Additional authentication mechanisms can be supported via {@link
+ DelegationTokenAuthenticator} implementations.
+
+ The default {@link DelegationTokenAuthenticator} is the {@link
+ KerberosDelegationTokenAuthenticator} class which supports
+ automatic fallback from Kerberos SPNEGO to Hadoop Simple authentication via
+ the {@link PseudoDelegationTokenAuthenticator} class.
+
+ AuthenticatedURL instances are not thread-safe.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Authenticator
+ for authentication.
+
+ @param url the URL to get the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token being used for the user where the
+ Delegation token will be stored.
+ @param renewer the renewer user.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+
+ Authenticator
+ for authentication.
+
+ @param url the URL to get the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token being used for the user where the
+ Delegation token will be stored.
+ @param renewer the renewer user.
+ @param doAsUser the user to do as, which will be the token owner.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+ Authenticator for authentication.
+
+ @param url the URL to renew the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token with the Delegation Token to renew.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+
+ Authenticator for authentication.
+
+ @param url the URL to renew the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token with the Delegation Token to renew.
+ @param doAsUser the user to do as, which will be the token owner.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+ Authenticator.
+
+ @param url the URL to cancel the delegation token from. Only HTTP/S URLs
+ are supported.
+ @param token the authentication token with the Delegation Token to cancel.
+ @throws IOException if an IO error occurred.]]>
+
+
+
+
+
+
+
+
+
+ Authenticator.
+
+ @param url the URL to cancel the delegation token from. Only HTTP/S URLs
+ are supported.
+ @param token the authentication token with the Delegation Token to cancel.
+ @param doAsUser the user to do as, which will be the token owner.
+ @throws IOException if an IO error occurred.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ KerberosDelegationTokenAuthenticator provides support for
+ Kerberos SPNEGO authentication mechanism and support for Hadoop Delegation
+ Token operations.
+
+ It falls back to the {@link PseudoDelegationTokenAuthenticator} if the HTTP
+ endpoint does not trigger a SPNEGO authentication]]>
+
+
+
+
+
+
+
+
+ PseudoDelegationTokenAuthenticator provides support for
+ Hadoop's pseudo authentication mechanism that accepts
+ the user name specified as a query string parameter and support for Hadoop
+ Delegation Token operations.
+
+ This mimics the model of Hadoop Simple authentication trusting the
+ {@link UserGroupInformation#getCurrentUser()} value.]]>
+
Any long-lived operation here will prevent the service state
+ change from completing in a timely manner.
+
If another thread is somehow invoked from the listener, and
+ that thread invokes the methods of the service (including
+ subclass-specific methods), there is a risk of a deadlock.
+
+
+
+ @param service the service that has changed.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The base implementation logs all arguments at the debug level,
+ then returns the passed in config unchanged.]]>
+
+
+
+
+
+
+ The action is to signal success by returning the exit code 0.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This method is called before {@link #init(Configuration)};
+ Any non-null configuration that is returned from this operation
+ becomes the one that is passed on to that {@link #init(Configuration)}
+ operation.
+
+ This permits implementations to change the configuration before
+ the init operation. As the ServiceLauncher only creates
+ an instance of the base {@link Configuration} class, it is
+ recommended to instantiate any subclass (such as YarnConfiguration)
+ that injects new resources.
+
+ @param config the initial configuration build up by the
+ service launcher.
+ @param args list of arguments passed to the command line
+ after any launcher-specific commands have been stripped.
+ @return the configuration to init the service with.
+ Recommended: pass down the config parameter with any changes
+ @throws Exception any problem]]>
+
+
+
+
+
+
+ The return value becomes the exit code of the launched process.
+
+ If an exception is raised, the policy is:
+
+
Any subset of {@link org.apache.hadoop.util.ExitUtil.ExitException}:
+ the exception is passed up unmodified.
+
+
Any exception which implements
+ {@link org.apache.hadoop.util.ExitCodeProvider}:
+ A new {@link ServiceLaunchException} is created with the exit code
+ and message of the thrown exception; the thrown exception becomes the
+ cause.
+
Any other exception: a new {@link ServiceLaunchException} is created
+ with the exit code {@link LauncherExitCodes#EXIT_EXCEPTION_THROWN} and
+ the message of the original exception (which becomes the cause).
+
+ @return the exit code
+ @throws org.apache.hadoop.util.ExitUtil.ExitException an exception passed
+ up as the exit code and error text.
+ @throws Exception any exception to report. If it provides an exit code
+ this is used in a wrapping exception.]]>
+
+
+
+
+ The command line options will be passed down before the
+ {@link Service#init(Configuration)} operation is invoked via an
+ invocation of {@link LaunchableService#bindArgs(Configuration, List)}
+ After the service has been successfully started via {@link Service#start()}
+ the {@link LaunchableService#execute()} method is called to execute the
+ service. When this method returns, the service launcher will exit, using
+ the return code from the method as its exit option.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 400 Bad Request}]]>
+
+
+
+
+
+ approximate HTTP equivalent: Approximate HTTP equivalent: {@code 401 Unauthorized}]]>
+
+
+
+
+
+
+
+
+
+
+ Approximate HTTP equivalent: Approximate HTTP equivalent: {@code 403: Forbidden}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 404: Not Found}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 405: Not allowed}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 406: Not Acceptable}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 408: Request Timeout}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 409: Conflict}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 500 Internal Server Error}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 501: Not Implemented}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 503 Service Unavailable}]]>
+
+
+
+
+
+ If raised, this is expected to be raised server-side and likely due
+ to client/server version incompatibilities.
+
+ Approximate HTTP equivalent: {@code 505: Version Not Supported}]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Codes with a YARN prefix are YARN-related.
+
+ Many of the exit codes are designed to resemble HTTP error codes,
+ squashed into a single byte. e.g 44 , "not found" is the equivalent
+ of 404. The various 2XX HTTP error codes aren't followed;
+ the Unix standard of "0" for success is used.
+
+ 0-10: general command issues
+ 30-39: equivalent to the 3XX responses, where those responses are
+ considered errors by the application.
+ 40-49: client-side/CLI/config problems
+ 50-59: service-side problems.
+ 60+ : application specific error codes
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This uses {@link String#format(String, Object...)}
+ to build the formatted exception in the ENGLISH locale.
+
+ If the last argument is a throwable, it becomes the cause of the exception.
+ It will also be used as a parameter for the format.
+ @param exitCode exit code
+ @param format format for message to use in exception
+ @param args list of arguments]]>
+
+
+
+
+
+ This uses {@link String#format(String, Object...)}
+ to build the formatted exception in the ENGLISH locale.
+ @param exitCode exit code
+ @param cause inner cause
+ @param format format for message to use in exception
+ @param args list of arguments]]>
+
+
+
+
+ When caught by the ServiceLauncher, it will convert that
+ into a process exit code.
+
+ The {@link #ServiceLaunchException(int, String, Object...)} constructor
+ generates formatted exceptions.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This will be 0 until a call
+ to {@link #finished()} has been made.
+ @return the currently recorded duration.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Clients and/or applications can use the provided Progressable
+ to explicitly report progress to the Hadoop framework. This is especially
+ important for operations which take significant amount of time since,
+ in-lieu of the reported progress, the framework has to assume that an error
+ has occurred and time-out the operation.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Class is to be obtained
+ @return the correctly typed Class of the given object.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ kill -0 command or equivalent]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ".cmd" on Windows, or ".sh" otherwise.
+
+ @param parent File parent directory
+ @param basename String script file basename
+ @return File referencing the script in the directory]]>
+
+
+
+
+
+ ".cmd" on Windows, or ".sh" otherwise.
+
+ @param basename String script file basename
+ @return String script file name]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ IOException.
+ @return the path to {@link #WINUTILS_EXE}
+ @throws RuntimeException if the path is not resolvable]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Shell.
+ @return the thread that ran runCommand() that spawned this shell
+ or null if no thread is waiting for this shell to complete]]>
+
+
+
+
+
+
+
+
+
+
+
+ Shell interface.
+ @param cmd shell command to execute.
+ @return the output of the executed command.]]>
+
+
+
+
+
+
+
+
+ Shell interface.
+ @param env the map of environment key=value
+ @param cmd shell command to execute.
+ @param timeout time in milliseconds after which script should be marked timeout
+ @return the output of the executed command.
+ @throws IOException on any problem.]]>
+
+
+
+
+
+
+
+ Shell interface.
+ @param env the map of environment key=value
+ @param cmd shell command to execute.
+ @return the output of the executed command.
+ @throws IOException on any problem.]]>
+
+
+
+
+ Shell processes.
+ Iterates through a map of all currently running Shell
+ processes and destroys them one by one. This method is thread safe]]>
+
+
+
+
+ Shell objects.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ CreateProcess synchronization object.]]>
+
+
+
+
+ os.name property.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Important: caller must check for this value being null.
+ The lack of such checks has led to many support issues being raised.
+
+ @deprecated use one of the exception-raising getter methods,
+ specifically {@link #getWinUtilsPath()} or {@link #getWinUtilsFile()}]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Shell can be used to run shell commands like du or
+ df. It also offers facilities to gate commands by
+ time-intervals.]]>
+
+
+
+
+
+
+
+ ShutdownHookManager singleton.
+
+ @return ShutdownHookManager singleton.]]>
+
+
+
+
+
+
+ Runnable
+ @param priority priority of the shutdownHook.]]>
+
+
+
+
+
+
+
+
+ Runnable
+ @param priority priority of the shutdownHook
+ @param timeout timeout of the shutdownHook
+ @param unit unit of the timeout TimeUnit]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ShutdownHookManager enables running shutdownHook
+ in a deterministic order, higher priority first.
+
+ The JVM runs ShutdownHooks in a non-deterministic order or in parallel.
+ This class registers a single JVM shutdownHook and run all the
+ shutdownHooks registered to it (to this class) in order based on their
+ priority.
+
+ Unless a hook was registered with a shutdown explicitly set through
+ {@link #addShutdownHook(Runnable, int, long, TimeUnit)},
+ the shutdown time allocated to it is set by the configuration option
+ {@link CommonConfigurationKeysPublic#SERVICE_SHUTDOWN_TIMEOUT} in
+ {@code core-site.xml}, with a default value of
+ {@link CommonConfigurationKeysPublic#SERVICE_SHUTDOWN_TIMEOUT_DEFAULT}
+ seconds.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Tool by {@link Tool#run(String[])}, after
+ parsing with the given generic arguments. Uses the given
+ Configuration, or builds one if null.
+
+ Sets the Tool's configuration with the possibly modified
+ version of the conf.
+
+ @param conf Configuration for the Tool.
+ @param tool Tool to run.
+ @param args command-line arguments to the tool.
+ @return exit code of the {@link Tool#run(String[])} method.]]>
+
+
+
+
+
+
+
+ Tool with its Configuration.
+
+ Equivalent to run(tool.getConf(), tool, args).
+
+ @param tool Tool to run.
+ @param args command-line arguments to the tool.
+ @return exit code of the {@link Tool#run(String[])} method.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ToolRunner can be used to run classes implementing
+ Tool interface. It works in conjunction with
+ {@link GenericOptionsParser} to parse the
+
+ generic hadoop command line arguments and modifies the
+ Configuration of the Tool. The
+ application-specific options are passed along without being modified.
+
+
+ @see Tool
+ @see GenericOptionsParser]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ this filter.
+ @param nbHash The number of hash function to consider.
+ @param hashType type of the hashing function (see
+ {@link org.apache.hadoop.util.hash.Hash}).]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Bloom filter, as defined by Bloom in 1970.
+
+ The Bloom filter is a data structure that was introduced in 1970 and that has been adopted by
+ the networking research community in the past decade thanks to the bandwidth efficiencies that it
+ offers for the transmission of set membership information between networked hosts. A sender encodes
+ the information into a bit vector, the Bloom filter, that is more compact than a conventional
+ representation. Computation and space costs for construction are linear in the number of elements.
+ The receiver uses the filter to test whether various elements are members of the set. Though the
+ filter will occasionally return a false positive, it will never return a false negative. When creating
+ the filter, the sender can choose its desired point in a trade-off between the false positive rate and the size.
+
+
+
+
+
+
+
+
+
+
+
+
+
+ this filter.
+ @param nbHash The number of hash function to consider.
+ @param hashType type of the hashing function (see
+ {@link org.apache.hadoop.util.hash.Hash}).]]>
+
+
+
+
+
+
+
+
+ this counting Bloom filter.
+
+ Invariant: nothing happens if the specified key does not belong to this counter Bloom filter.
+ @param key The key to remove.]]>
+
+
+
+
+
+
+
+
+
+
+
+ key -> count map.
+
NOTE: due to the bucket size of this filter, inserting the same
+ key more than 15 times will cause an overflow at all filter positions
+ associated with this key, and it will significantly increase the error
+ rate for this and other keys. For this reason the filter can only be
+ used to store small count values 0 <= N << 15.
+ @param key key to be tested
+ @return 0 if the key is not present. Otherwise, a positive value v will
+ be returned such that v == count with probability equal to the
+ error rate of this filter, and v > count otherwise.
+ Additionally, if the filter experienced an underflow as a result of
+ {@link #delete(Key)} operation, the return value may be lower than the
+ count with the probability of the false negative rate of such
+ filter.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ counting Bloom filter, as defined by Fan et al. in a ToN
+ 2000 paper.
+
+ A counting Bloom filter is an improvement to standard a Bloom filter as it
+ allows dynamic additions and deletions of set membership information. This
+ is achieved through the use of a counting vector instead of a bit vector.
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Builds an empty Dynamic Bloom filter.
+ @param vectorSize The number of bits in the vector.
+ @param nbHash The number of hash function to consider.
+ @param hashType type of the hashing function (see
+ {@link org.apache.hadoop.util.hash.Hash}).
+ @param nr The threshold for the maximum number of keys to record in a
+ dynamic Bloom filter row.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ dynamic Bloom filter, as defined in the INFOCOM 2006 paper.
+
+ A dynamic Bloom filter (DBF) makes use of a s * m bit matrix but
+ each of the s rows is a standard Bloom filter. The creation
+ process of a DBF is iterative. At the start, the DBF is a 1 * m
+ bit matrix, i.e., it is composed of a single standard Bloom filter.
+ It assumes that nr elements are recorded in the
+ initial bit vector, where nr {@literal <=} n
+ (n is the cardinality of the set A to record in
+ the filter).
+
+ As the size of A grows during the execution of the application,
+ several keys must be inserted in the DBF. When inserting a key into the DBF,
+ one must first get an active Bloom filter in the matrix. A Bloom filter is
+ active when the number of recorded keys, nr, is
+ strictly less than the current cardinality of A, n.
+ If an active Bloom filter is found, the key is inserted and
+ nr is incremented by one. On the other hand, if there
+ is no active Bloom filter, a new one is created (i.e., a new row is added to
+ the matrix) according to the current size of A and the element
+ is added in this new Bloom filter and the nr value of
+ this new Bloom filter is set to one. A given key is said to belong to the
+ DBF if the k positions are set to one in one of the matrix rows.
+
+
+
+
+
+
+
+
+ Builds a hash function that must obey to a given maximum number of returned values and a highest value.
+ @param maxValue The maximum highest returned value.
+ @param nbHash The number of resulting hashed values.
+ @param hashType type of the hashing function (see {@link Hash}).]]>
+
+
+
+
+ this hash function. A NOOP]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The idea is to randomly select a bit to reset.]]>
+
+
+
+
+
+ The idea is to select the bit to reset that will generate the minimum
+ number of false negative.]]>
+
+
+
+
+
+ The idea is to select the bit to reset that will remove the maximum number
+ of false positive.]]>
+
+
+
+
+
+ The idea is to select the bit to reset that will, at the same time, remove
+ the maximum number of false positve while minimizing the amount of false
+ negative generated.]]>
+
+
+
+
+ Originally created by
+ European Commission One-Lab Project 034819.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ this filter.
+ @param nbHash The number of hash function to consider.
+ @param hashType type of the hashing function (see
+ {@link org.apache.hadoop.util.hash.Hash}).]]>
+
+
+
+
+
+
+
+
+ this retouched Bloom filter.
+
+ Invariant: if the false positive is null, nothing happens.
+ @param key The false positive key to add.]]>
+
+
+
+
+
+ this retouched Bloom filter.
+ @param coll The collection of false positive.]]>
+
+
+
+
+
+ this retouched Bloom filter.
+ @param keys The list of false positive.]]>
+
+
+
+
+
+ this retouched Bloom filter.
+ @param keys The array of false positive.]]>
+
+
+
+
+
+
+ this retouched Bloom filter.
+ @param scheme The selective clearing scheme to apply.]]>
+
+
+
+
+
+
+
+
+
+
+
+ retouched Bloom filter, as defined in the CoNEXT 2006 paper.
+
+ It allows the removal of selected false positives at the cost of introducing
+ random false negatives, and with the benefit of eliminating some random false
+ positives at the same time.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Any exception generated in the future is
+ extracted and rethrown.
+
+ @param future future to evaluate
+ @param type of the result.
+ @return the result, if all went well.
+ @throws InterruptedIOException future was interrupted
+ @throws IOException if something went wrong
+ @throws RuntimeException any nested RTE thrown]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ Any exception generated in the future is
+ extracted and rethrown.
+
+ @param future future to evaluate
+ @param type of the result.
+ @return the result, if all went well.
+ @throws InterruptedIOException future was interrupted
+ @throws IOException if something went wrong
+ @throws RuntimeException any nested RTE thrown
+ @throws TimeoutException the future timed out.]]>
+
+
+
+
+
+
+ type of return value.
+ @return nothing, ever.
+ @throws IOException either the inner IOException, or a wrapper around
+ any non-Runtime-Exception
+ @throws RuntimeException if that is the inner cause.]]>
+
+
+
+
+
+
+ type of return value.
+ @return nothing, ever.
+ @throws IOException either the inner IOException, or a wrapper around
+ any non-Runtime-Exception
+ @throws RuntimeException if that is the inner cause.]]>
+
+
+
+
+
+
+
If it is an IOE: Return.
+
If it is a {@link UncheckedIOException}: return the cause
+
Completion/Execution Exceptions: extract and repeat
+
If it is an RTE or Error: throw.
+
Any other type: wrap in an IOE
+
+
+ Recursively handles wrapped Execution and Completion Exceptions in
+ case something very complicated has happened.
+ @param e exception.
+ @return an IOException extracted or built from the cause.
+ @throws RuntimeException if that is the inner cause.
+ @throws Error if that is the inner cause.]]>
+
+
+
+
+ Contains methods promoted from
+ {@link org.apache.hadoop.fs.impl.FutureIOSupport} because they
+ are a key part of integrating async IO in application code.
+
+
+ One key feature is that the {@link #awaitFuture(Future)} and
+ {@link #awaitFuture(Future, long, TimeUnit)} calls will
+ extract and rethrow exceptions raised in the future's execution,
+ including extracting the inner IOException of any
+ {@code UncheckedIOException} raised in the future.
+ This makes it somewhat easier to execute IOException-raising
+ code inside futures.
+
]]>
+
+
+
+
+
+
+
+
+ type
+ @return a remote iterator]]>
+
+
+
+
+
+ type
+ @return a remote iterator]]>
+
+
+
+
+
+ type
+ @return a remote iterator]]>
+
+
+
+
+
+ type
+ @return a remote iterator]]>
+
+
+
+
+
+
+ source type
+ @param result type
+ @param iterator source
+ @param mapper transformation
+ @return a remote iterator]]>
+
+
+
+
+
+ source type
+ @param result type
+ @param iterator source
+ @return a remote iterator]]>
+
+
+
+
+
+
+
+ Elements are filtered in the hasNext() method; if not used
+ the filtering will be done on demand in the {@code next()}
+ call.
+ @param type
+ @param iterator source
+ @param filter filter
+ @return a remote iterator]]>
+
+
+
+
+
+
+ source type.
+ @return a new iterator]]>
+
+
+
+
+
+
+ type
+ @return a list of the values.
+ @throws IOException if the source RemoteIterator raises it.]]>
+
+
+
+
+
+
+
+ type
+ @return an array of the values.
+ @throws IOException if the source RemoteIterator raises it.]]>
+
+
+
+
+
+
+
+
+ If the iterator is an IOStatisticsSource returning a non-null
+ set of statistics, and this classes log is set to DEBUG,
+ then the statistics of the operation are evaluated and logged at
+ debug.
+
+ The number of entries processed is returned, as it is useful to
+ know this, especially during tests or when reporting values
+ to users.
+
+ This does not close the iterator afterwards.
+ @param source iterator source
+ @param consumer consumer of the values.
+ @return the number of elements processed
+ @param type of source
+ @throws IOException if the source RemoteIterator or the consumer raise one.]]>
+
+
+
+
+
+ type of source]]>
+
+
+
+
+ This aims to make it straightforward to use lambda-expressions to
+ transform the results of an iterator, without losing the statistics
+ in the process, and to chain the operations together.
+
+ The closeable operation will be passed through RemoteIterators which
+ wrap other RemoteIterators. This is to support any iterator which
+ can be closed to release held connections, file handles etc.
+ Unless client code is written to assume that RemoteIterator instances
+ may be closed, this is not likely to be broadly used. It is added
+ to make it possible to adopt this feature in a managed way.
+
+ One notable feature is that the
+ {@link #foreach(RemoteIterator, ConsumerRaisingIOE)} method will
+ LOG at debug any IOStatistics provided by the iterator, if such
+ statistics are provided. There's no attempt at retrieval and logging
+ if the LOG is not set to debug, so it is a zero cost feature unless
+ the logger {@code org.apache.hadoop.fs.functional.RemoteIterators}
+ is at DEBUG.
+
+ Based on the S3A Listing code, and some some work on moving other code
+ to using iterative listings so as to pick up the statistics.]]>
+
+
+
+
+
+
+
+
diff --git a/hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_3.3.4.xml b/hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_3.3.4.xml
new file mode 100644
index 0000000000000..62a0e09f121af
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/dev-support/jdiff/Apache_Hadoop_Common_3.3.4.xml
@@ -0,0 +1,39037 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ UnsupportedOperationException
+
+ If a key is deprecated in favor of multiple keys, they are all treated as
+ aliases of each other, and setting any one of them resets all the others
+ to the new value.
+
+ If you have multiple deprecation entries to add, it is more efficient to
+ use #addDeprecations(DeprecationDelta[] deltas) instead.
+
+ @param key to be deprecated
+ @param newKeys list of keys that take up the values of deprecated key
+ @param customMessage depcrication message
+ @deprecated use {@link #addDeprecation(String key, String newKey,
+ String customMessage)} instead]]>
+
+
+
+
+
+
+
+ UnsupportedOperationException
+
+ If you have multiple deprecation entries to add, it is more efficient to
+ use #addDeprecations(DeprecationDelta[] deltas) instead.
+
+ @param key to be deprecated
+ @param newKey key that take up the values of deprecated key
+ @param customMessage deprecation message]]>
+
+
+
+
+
+
+ UnsupportedOperationException
+
+ If a key is deprecated in favor of multiple keys, they are all treated as
+ aliases of each other, and setting any one of them resets all the others
+ to the new value.
+
+ If you have multiple deprecation entries to add, it is more efficient to
+ use #addDeprecations(DeprecationDelta[] deltas) instead.
+
+ @param key Key that is to be deprecated
+ @param newKeys list of keys that take up the values of deprecated key
+ @deprecated use {@link #addDeprecation(String key, String newKey)} instead]]>
+
+
+
+
+
+
+ UnsupportedOperationException
+
+ If you have multiple deprecation entries to add, it is more efficient to
+ use #addDeprecations(DeprecationDelta[] deltas) instead.
+
+ @param key Key that is to be deprecated
+ @param newKey key that takes up the value of deprecated key]]>
+
+
+
+
+
+ key is deprecated.
+
+ @param key the parameter which is to be checked for deprecation
+ @return true if the key is deprecated and
+ false otherwise.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ final.
+
+ @param name resource to be added, the classpath is examined for a file
+ with that name.]]>
+
+
+
+
+
+
+
+
+
+ final.
+
+ @param url url of the resource to be added, the local filesystem is
+ examined directly to find the resource, without referring to
+ the classpath.]]>
+
+
+
+
+
+
+
+
+
+ final.
+
+ @param file file-path of resource to be added, the local filesystem is
+ examined directly to find the resource, without referring to
+ the classpath.]]>
+
+
+
+
+
+
+
+
+
+ final.
+
+ WARNING: The contents of the InputStream will be cached, by this method.
+ So use this sparingly because it does increase the memory consumption.
+
+ @param in InputStream to deserialize the object from. In will be read from
+ when a get or set is called next. After it is read the stream will be
+ closed.]]>
+
+
+
+
+
+
+
+
+
+
+ final.
+
+ @param in InputStream to deserialize the object from.
+ @param name the name of the resource because InputStream.toString is not
+ very descriptive some times.]]>
+
+
+
+
+
+
+
+
+
+
+ final.
+
+ @param conf Configuration object from which to load properties]]>
+
+
+
+
+
+
+
+
+
+
+ name property, null if
+ no such property exists. If the key is deprecated, it returns the value of
+ the first key which replaces the deprecated key and is not null.
+
+ Values are processed for variable expansion
+ before being returned.
+
+ As a side effect get loads the properties from the sources if called for
+ the first time as a lazy init.
+
+ @param name the property name, will be trimmed before get value.
+ @return the value of the name or its replacing property,
+ or null if no such property exists.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name property, but only for
+ names which have no valid value, usually non-existent or commented
+ out in XML.
+
+ @param name the property name
+ @return true if the property name exists without value]]>
+
+
+
+
+
+ name property as a trimmed String,
+ null if no such property exists.
+ If the key is deprecated, it returns the value of
+ the first key which replaces the deprecated key and is not null
+
+ Values are processed for variable expansion
+ before being returned.
+
+ @param name the property name.
+ @return the value of the name or its replacing property,
+ or null if no such property exists.]]>
+
+
+
+
+
+
+ name property as a trimmed String,
+ defaultValue if no such property exists.
+ See @{Configuration#getTrimmed} for more details.
+
+ @param name the property name.
+ @param defaultValue the property default value.
+ @return the value of the name or defaultValue
+ if it is not set.]]>
+
+
+
+
+
+ name property, without doing
+ variable expansion.If the key is
+ deprecated, it returns the value of the first key which replaces
+ the deprecated key and is not null.
+
+ @param name the property name.
+ @return the value of the name property or
+ its replacing property and null if no such property exists.]]>
+
+
+
+
+
+
+ value of the name property. If
+ name is deprecated or there is a deprecated name associated to it,
+ it sets the value to both names. Name will be trimmed before put into
+ configuration.
+
+ @param name property name.
+ @param value property value.]]>
+
+
+
+
+
+
+
+ value of the name property. If
+ name is deprecated, it also sets the value to
+ the keys that replace the deprecated key. Name will be trimmed before put
+ into configuration.
+
+ @param name property name.
+ @param value property value.
+ @param source the place that this configuration value came from
+ (For debugging).
+ @throws IllegalArgumentException when the value or name is null.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name. If the key is deprecated,
+ it returns the value of the first key which replaces the deprecated key
+ and is not null.
+ If no such property exists,
+ then defaultValue is returned.
+
+ @param name property name, will be trimmed before get value.
+ @param defaultValue default value.
+ @return property value, or defaultValue if the property
+ doesn't exist.]]>
+
+
+
+
+
+
+ name property as an int.
+
+ If no such property exists, the provided default value is returned,
+ or if the specified value is not a valid int,
+ then an error is thrown.
+
+ @param name property name.
+ @param defaultValue default value.
+ @throws NumberFormatException when the value is invalid
+ @return property value as an int,
+ or defaultValue.]]>
+
+
+
+
+
+ name property as a set of comma-delimited
+ int values.
+
+ If no such property exists, an empty array is returned.
+
+ @param name property name
+ @return property value interpreted as an array of comma-delimited
+ int values]]>
+
+
+
+
+
+
+ name property to an int.
+
+ @param name property name.
+ @param value int value of the property.]]>
+
+
+
+
+
+
+ name property as a long.
+ If no such property exists, the provided default value is returned,
+ or if the specified value is not a valid long,
+ then an error is thrown.
+
+ @param name property name.
+ @param defaultValue default value.
+ @throws NumberFormatException when the value is invalid
+ @return property value as a long,
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property as a long or
+ human readable format. If no such property exists, the provided default
+ value is returned, or if the specified value is not a valid
+ long or human readable format, then an error is thrown. You
+ can use the following suffix (case insensitive): k(kilo), m(mega), g(giga),
+ t(tera), p(peta), e(exa)
+
+ @param name property name.
+ @param defaultValue default value.
+ @throws NumberFormatException when the value is invalid
+ @return property value as a long,
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property to a long.
+
+ @param name property name.
+ @param value long value of the property.]]>
+
+
+
+
+
+
+ name property as a float.
+ If no such property exists, the provided default value is returned,
+ or if the specified value is not a valid float,
+ then an error is thrown.
+
+ @param name property name.
+ @param defaultValue default value.
+ @throws NumberFormatException when the value is invalid
+ @return property value as a float,
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property to a float.
+
+ @param name property name.
+ @param value property value.]]>
+
+
+
+
+
+
+ name property as a double.
+ If no such property exists, the provided default value is returned,
+ or if the specified value is not a valid double,
+ then an error is thrown.
+
+ @param name property name.
+ @param defaultValue default value.
+ @throws NumberFormatException when the value is invalid
+ @return property value as a double,
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property to a double.
+
+ @param name property name.
+ @param value property value.]]>
+
+
+
+
+
+
+ name property as a boolean.
+ If no such property is specified, or if the specified value is not a valid
+ boolean, then defaultValue is returned.
+
+ @param name property name.
+ @param defaultValue default value.
+ @return property value as a boolean,
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property to a boolean.
+
+ @param name property name.
+ @param value boolean value of the property.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name property to the given type. This
+ is equivalent to set(<name>, value.toString()).
+ @param name property name
+ @param value new value
+ @param enumeration type]]>
+
+
+
+
+
+
+ enumeration type
+ @throws IllegalArgumentException If mapping is illegal for the type
+ provided
+ @return enumeration type]]>
+
+
+
+
+
+
+
+ name to the given time duration. This
+ is equivalent to set(<name>, value + <time suffix>).
+ @param name Property name
+ @param value Time duration
+ @param unit Unit of time]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name property as a Pattern.
+ If no such property is specified, or if the specified value is not a valid
+ Pattern, then DefaultValue is returned.
+ Note that the returned value is NOT trimmed by this method.
+
+ @param name property name
+ @param defaultValue default value
+ @return property value as a compiled Pattern, or defaultValue]]>
+
+
+
+
+
+
+ Pattern.
+ If the pattern is passed as null, sets the empty pattern which results in
+ further calls to getPattern(...) returning the default value.
+
+ @param name property name
+ @param pattern new value]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name property as
+ a collection of Strings.
+ If no such property is specified then empty collection is returned.
+
+ This is an optimized version of {@link #getStrings(String)}
+
+ @param name property name.
+ @return property value as a collection of Strings.]]>
+
+
+
+
+
+ name property as
+ an array of Strings.
+ If no such property is specified then null is returned.
+
+ @param name property name.
+ @return property value as an array of Strings,
+ or null.]]>
+
+
+
+
+
+
+ name property as
+ an array of Strings.
+ If no such property is specified then default value is returned.
+
+ @param name property name.
+ @param defaultValue The default value
+ @return property value as an array of Strings,
+ or default value.]]>
+
+
+
+
+
+ name property as
+ a collection of Strings, trimmed of the leading and trailing whitespace.
+ If no such property is specified then empty Collection is returned.
+
+ @param name property name.
+ @return property value as a collection of Strings, or empty Collection]]>
+
+
+
+
+
+ name property as
+ an array of Strings, trimmed of the leading and trailing whitespace.
+ If no such property is specified then an empty array is returned.
+
+ @param name property name.
+ @return property value as an array of trimmed Strings,
+ or empty array.]]>
+
+
+
+
+
+
+ name property as
+ an array of Strings, trimmed of the leading and trailing whitespace.
+ If no such property is specified then default value is returned.
+
+ @param name property name.
+ @param defaultValue The default value
+ @return property value as an array of trimmed Strings,
+ or default value.]]>
+
+
+
+
+
+
+ name property as
+ as comma delimited values.
+
+ @param name property name.
+ @param values The values]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ hostProperty as a
+ InetSocketAddress. If hostProperty is
+ null, addressProperty will be used. This
+ is useful for cases where we want to differentiate between host
+ bind address and address clients should use to establish connection.
+
+ @param hostProperty bind host property name.
+ @param addressProperty address property name.
+ @param defaultAddressValue the default value
+ @param defaultPort the default port
+ @return InetSocketAddress]]>
+
+
+
+
+
+
+
+ name property as a
+ InetSocketAddress.
+ @param name property name.
+ @param defaultAddress the default value
+ @param defaultPort the default port
+ @return InetSocketAddress]]>
+
+
+
+
+
+
+ name property as
+ a host:port.]]>
+
+
+
+
+
+
+
+
+ name property as a host:port. The wildcard
+ address is replaced with the local host's address. If the host and address
+ properties are configured the host component of the address will be combined
+ with the port component of the addr to generate the address. This is to allow
+ optional control over which host name is used in multi-home bind-host
+ cases where a host can have multiple names
+ @param hostProperty the bind-host configuration name
+ @param addressProperty the service address configuration name
+ @param defaultAddressValue the service default address configuration value
+ @param addr InetSocketAddress of the service listener
+ @return InetSocketAddress for clients to connect]]>
+
+
+
+
+
+
+ name property as a host:port. The wildcard
+ address is replaced with the local host's address.
+ @param name property name.
+ @param addr InetSocketAddress of a listener to store in the given property
+ @return InetSocketAddress for clients to connect]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ name property
+ as an array of Class.
+ The value of the property specifies a list of comma separated class names.
+ If no such property is specified, then defaultValue is
+ returned.
+
+ @param name the property name.
+ @param defaultValue default value.
+ @return property value as a Class[],
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property as a Class.
+ If no such property is specified, then defaultValue is
+ returned.
+
+ @param name the conf key name.
+ @param defaultValue default value.
+ @return property value as a Class,
+ or defaultValue.]]>
+
+
+
+
+
+
+
+ name property as a Class
+ implementing the interface specified by xface.
+
+ If no such property is specified, then defaultValue is
+ returned.
+
+ An exception is thrown if the returned class does not implement the named
+ interface.
+
+ @param name the conf key name.
+ @param defaultValue default value.
+ @param xface the interface implemented by the named class.
+ @return property value as a Class,
+ or defaultValue.]]>
+
+
+
+
+
+
+ name property as a List
+ of objects implementing the interface specified by xface.
+
+ An exception is thrown if any of the classes does not exist, or if it does
+ not implement the named interface.
+
+ @param name the property name.
+ @param xface the interface implemented by the classes named by
+ name.
+ @return a List of objects implementing xface.]]>
+
+
+
+
+
+
+
+ name property to the name of a
+ theClass implementing the given interface xface.
+
+ An exception is thrown if theClass does not implement the
+ interface xface.
+
+ @param name property name.
+ @param theClass property value.
+ @param xface the interface implemented by the named class.]]>
+
+
+
+
+
+
+
+ dirsProp with
+ the given path. If dirsProp contains multiple directories,
+ then one is chosen based on path's hash code. If the selected
+ directory does not exist, an attempt is made to create it.
+
+ @param dirsProp directory in which to locate the file.
+ @param path file-path.
+ @return local file under the directory with the given path.]]>
+
+
+
+
+
+
+
+ dirsProp with
+ the given path. If dirsProp contains multiple directories,
+ then one is chosen based on path's hash code. If the selected
+ directory does not exist, an attempt is made to create it.
+
+ @param dirsProp directory in which to locate the file.
+ @param path file-path.
+ @return local file under the directory with the given path.]]>
+
+
+
+
+
+
+
+
+
+
+
+ name.
+
+ @param name configuration resource name.
+ @return an input stream attached to the resource.]]>
+
+
+
+
+
+ name.
+
+ @param name configuration resource name.
+ @return a reader attached to the resource.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ String
+ key-value pairs in the configuration.
+
+ @return an iterator over the entries.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ When property name is not empty and the property exists in the
+ configuration, this method writes the property and its attributes
+ to the {@link Writer}.
+
+
+
+ When property name is null or empty, this method writes all the
+ configuration properties and their attributes to the {@link Writer}.
+
+
+
+ When property name is not empty but the property doesn't exist in
+ the configuration, this method throws an {@link IllegalArgumentException}.
+
+
+ @param out the writer to write to.]]>
+
+
+
+
+
+
+
+
+
+
+ When propertyName is not empty, and the property exists
+ in the configuration, the format of the output would be,
+
+ When propertyName is not empty, and the property is not
+ found in the configuration, this method will throw an
+ {@link IllegalArgumentException}.
+
+
+
+ @param config the configuration
+ @param propertyName property name
+ @param out the Writer to write to
+ @throws IOException
+ @throws IllegalArgumentException when property name is not
+ empty and the property is not found in configuration]]>
+
+
+
+
+
+
+
+
+ { "properties" :
+ [ { key : "key1",
+ value : "value1",
+ isFinal : "key1.isFinal",
+ resource : "key1.resource" },
+ { key : "key2",
+ value : "value2",
+ isFinal : "ke2.isFinal",
+ resource : "key2.resource" }
+ ]
+ }
+
+
+ It does not output the properties of the configuration object which
+ is loaded from an input stream.
+
+
+ @param config the configuration
+ @param out the Writer to write to
+ @throws IOException]]>
+
Configurations are specified by resources. A resource contains a set of
+ name/value pairs as XML data. Each resource is named by either a
+ String or by a {@link Path}. If named by a String,
+ then the classpath is examined for a file with that name. If named by a
+ Path, then the local filesystem is examined directly, without
+ referring to the classpath.
+
+
Unless explicitly turned off, Hadoop by default specifies two
+ resources, loaded in-order from the classpath:
core-site.xml: Site-specific configuration for a given hadoop
+ installation.
+
+ Applications may add additional resources, which are loaded
+ subsequent to these resources in the order they are added.
+
+
Final Parameters
+
+
Configuration parameters may be declared final.
+ Once a resource declares a value final, no subsequently-loaded
+ resource can alter that value.
+ For example, one might define a final parameter with:
+
When conf.get("tempdir") is called, then ${basedir}
+ will be resolved to another property in this Configuration, while
+ ${user.name} would then ordinarily be resolved to the value
+ of the System property with that name.
+
When conf.get("otherdir") is called, then ${env.BASE_DIR}
+ will be resolved to the value of the ${BASE_DIR} environment variable.
+ It supports ${env.NAME:-default} and ${env.NAME-default} notations.
+ The former is resolved to "default" if ${NAME} environment variable is undefined
+ or its value is empty.
+ The latter behaves the same way only if ${NAME} is undefined.
+
By default, warnings will be given to any deprecated configuration
+ parameters and these are suppressible by configuring
+ log4j.logger.org.apache.hadoop.conf.Configuration.deprecation in
+ log4j.properties file.
+
+
Tags
+
+
Optionally we can tag related properties together by using tag
+ attributes. System tags are defined by hadoop.tags.system property. Users
+ can define there own custom tags in hadoop.tags.custom property.
+
+
Properties marked with tags can be retrieved with conf
+ .getAllPropertiesByTag("HDFS") or conf.getAllPropertiesByTags
+ (Arrays.asList("YARN","SECURITY")).
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This implementation generates the key material and calls the
+ {@link #createKey(String, byte[], Options)} method.
+
+ @param name the base name of the key
+ @param options the options for the new key.
+ @return the version name of the first version of the key.
+ @throws IOException
+ @throws NoSuchAlgorithmException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This implementation generates the key material and calls the
+ {@link #rollNewVersion(String, byte[])} method.
+
+ @param name the basename of the key
+ @return the name of the new version of the key
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ KeyProvider implementations must be thread safe.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ NULL if
+ a provider for the specified URI scheme could not be found.
+ @throws IOException thrown if the provider failed to initialize.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ uri has syntax error]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ uri is
+ not found]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ uri
+ determines a configuration property name,
+ fs.AbstractFileSystem.scheme.impl whose value names the
+ AbstractFileSystem class.
+
+ The entire URI and conf is passed to the AbstractFileSystem factory method.
+
+ @param uri for the file system to be created.
+ @param conf which is passed to the file system impl.
+
+ @return file system for the given URI.
+
+ @throws UnsupportedFileSystemException if the file system for
+ uri is not supported.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ In some FileSystem implementations such as HDFS metadata
+ synchronization is essential to guarantee consistency of read requests
+ particularly in HA setting.
+ @throws IOException
+ @throws UnsupportedOperationException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ } describing modifications
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+
+ } describing entries to remove
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ } describing modifications, must
+ include entries for user, group, and others for compatibility with
+ permission bits.
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+ } which returns each AclStatus
+ @throws IOException if an ACL could not be read]]>
+
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to modify
+ @param name xattr name.
+ @param value xattr value.
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to modify
+ @param name xattr name.
+ @param value xattr value.
+ @param flag xattr set flag
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attribute
+ @param name xattr name.
+ @return byte[] xattr value.
+ @throws IOException]]>
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+
+ @return {@literal Map} describing the XAttrs of the file
+ or directory
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @param names XAttr names.
+ @return {@literal Map} describing the XAttrs of the file
+ or directory
+ @throws IOException]]>
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @return {@literal Map} describing the XAttrs of the file
+ or directory
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to remove extended attribute
+ @param name xattr name
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ BlockLocation(offset: 0, length: BLOCK_SIZE,
+ hosts: {"host1:9866", "host2:9866, host3:9866"})
+
+
+ And if the file is erasure-coded, each BlockLocation represents a logical
+ block groups. Value offset is the offset of a block group in the file and
+ value length is the total length of a block group. Hosts of a BlockLocation
+ are the datanodes that holding all the data blocks and parity blocks of a
+ block group.
+ Suppose we have a RS_3_2 coded file (3 data units and 2 parity units).
+ A BlockLocation example will be like:
+
+
+ Please refer to
+ {@link FileSystem#getFileBlockLocations(FileStatus, long, long)} or
+ {@link FileContext#getFileBlockLocations(Path, long, long)}
+ for more examples.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ After a successful call, {@code buf.position()} will be advanced by the
+ number of bytes read and {@code buf.limit()} will be unchanged.
+
+ In the case of an exception, the state of the buffer (the contents of the
+ buffer, the {@code buf.position()}, the {@code buf.limit()}, etc.) is
+ undefined, and callers should be prepared to recover from this
+ eventuality.
+
+ Callers should use {@link StreamCapabilities#hasCapability(String)} with
+ {@link StreamCapabilities#PREADBYTEBUFFER} to check if the underlying
+ stream supports this interface, otherwise they might get a
+ {@link UnsupportedOperationException}.
+
+ Implementations should treat 0-length requests as legitimate, and must not
+ signal an error upon their receipt.
+
+ This does not change the current offset of a file, and is thread-safe.
+
+ @param position position within file
+ @param buf the ByteBuffer to receive the results of the read operation.
+ @return the number of bytes read, possibly zero, or -1 if reached
+ end-of-stream
+ @throws IOException if there is some error performing the read]]>
+
+
+
+
+
+
+
+
+ This operation provides similar semantics to
+ {@link #read(long, ByteBuffer)}, the difference is that this method is
+ guaranteed to read data until the {@link ByteBuffer} is full, or until
+ the end of the data stream is reached.
+
+ @param position position within file
+ @param buf the ByteBuffer to receive the results of the read operation.
+ @throws IOException if there is some error performing the read
+ @throws EOFException the end of the data was reached before
+ the read operation completed
+ @see #read(long, ByteBuffer)]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ After a successful call, {@code buf.position()} will be advanced by the
+ number of bytes read and {@code buf.limit()} will be unchanged.
+
+ In the case of an exception, the state of the buffer (the contents of the
+ buffer, the {@code buf.position()}, the {@code buf.limit()}, etc.) is
+ undefined, and callers should be prepared to recover from this
+ eventuality.
+
+ Callers should use {@link StreamCapabilities#hasCapability(String)} with
+ {@link StreamCapabilities#READBYTEBUFFER} to check if the underlying
+ stream supports this interface, otherwise they might get a
+ {@link UnsupportedOperationException}.
+
+ Implementations should treat 0-length requests as legitimate, and must not
+ signal an error upon their receipt.
+
+ @param buf
+ the ByteBuffer to receive the results of the read operation.
+ @return the number of bytes read, possibly zero, or -1 if
+ reach end-of-stream
+ @throws IOException
+ if there is some error performing the read]]>
+
CREATE - to create a file if it does not exist,
+ else throw FileAlreadyExists.
+
APPEND - to append to a file if it exists,
+ else throw FileNotFoundException.
+
OVERWRITE - to truncate a file if it exists,
+ else throw FileNotFoundException.
+
CREATE|APPEND - to create a file if it does not exist,
+ else append to an existing file.
+
CREATE|OVERWRITE - to create a file if it does not exist,
+ else overwrite an existing file.
+
SYNC_BLOCK - to force closed blocks to the disk device.
+ In addition {@link Syncable#hsync()} should be called after each write,
+ if true synchronous behavior is required.
+
LAZY_PERSIST - Create the block on transient storage (RAM) if
+ available.
+
APPEND_NEWBLOCK - Append data to a new block instead of end of the last
+ partial block.
+
+
+ Following combinations are not valid and will result in
+ {@link HadoopIllegalArgumentException}:
+
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws AccessControlException if access denied
+ @throws IOException If an IO Error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+
+ RuntimeExceptions:
+ @throws InvalidPathException If path f is not valid]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Progress - to report progress on the operation - default null
+
Permission - umask is applied against permission: default is
+ FsPermissions:getDefault()
+
+
CreateParent - create missing parent path; default is to not
+ to create parents
+
The defaults for the following are SS defaults of the file
+ server implementing the target path. Not all parameters make sense
+ for all kinds of file system - eg. localFS ignores Blocksize,
+ replication, checksum
+
+
BufferSize - buffersize used in FSDataOutputStream
+
Blocksize - block size for file blocks
+
ReplicationFactor - replication for blocks
+
ChecksumParam - Checksum parameters. server default is used
+ if not specified.
+
+
+
+ @return {@link FSDataOutputStream} for created file
+
+ @throws AccessControlException If access is denied
+ @throws FileAlreadyExistsException If file f already exists
+ @throws FileNotFoundException If parent of f does not exist
+ and createParent is false
+ @throws ParentNotDirectoryException If parent of f is not a
+ directory.
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+
+ RuntimeExceptions:
+ @throws InvalidPathException If path f is not valid]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ dir already
+ exists
+ @throws FileNotFoundException If parent of dir does not exist
+ and createParent is false
+ @throws ParentNotDirectoryException If parent of dir is not a
+ directory
+ @throws UnsupportedFileSystemException If file system for dir
+ is not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+
+ RuntimeExceptions:
+ @throws InvalidPathException If path dir is not valid]]>
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+
+ RuntimeExceptions:
+ @throws InvalidPathException If path f is invalid]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f
+ is not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
Fails if path is a directory.
+
Fails if path does not exist.
+
Fails if path is not closed.
+
Fails if new size is greater than current size.
+
+ @param f The path to the file to be truncated
+ @param newLength The size the file is to be truncated to
+
+ @return true if the file has been truncated to the desired
+ newLength and is immediately available to be reused for
+ write operations such as append, or
+ false if a background process of adjusting the length of
+ the last block has been started, and clients should wait for it to
+ complete before proceeding with further file updates.
+
+ @throws AccessControlException If access is denied
+ @throws FileNotFoundException If file f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Fails if src is a file and dst is a directory.
+
Fails if src is a directory and dst is a file.
+
Fails if the parent of dst does not exist or is a file.
+
+
+ If OVERWRITE option is not passed as an argument, rename fails if the dst
+ already exists.
+
+ If OVERWRITE option is passed as an argument, rename overwrites the dst if
+ it is a file or an empty directory. Rename fails if dst is a non-empty
+ directory.
+
+ Note that atomicity of rename is dependent on the file system
+ implementation. Please refer to the file system documentation for details
+
+
+ @param src path to be renamed
+ @param dst new path after rename
+
+ @throws AccessControlException If access is denied
+ @throws FileAlreadyExistsException If dst already exists and
+ options has {@link Options.Rename#OVERWRITE}
+ option false.
+ @throws FileNotFoundException If src does not exist
+ @throws ParentNotDirectoryException If parent of dst is not a
+ directory
+ @throws UnsupportedFileSystemException If file system for src
+ and dst is not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f
+ is not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server
+
+ RuntimeExceptions:
+ @throws HadoopIllegalArgumentException If username or
+ groupname is invalid.]]>
+
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If the given path does not refer to a symlink
+ or an I/O error occurred]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Given a path referring to a symlink of form:
+
+ {@literal <---}X{@literal --->}
+ fs://host/A/B/link
+ {@literal <-----}Y{@literal ----->}
+
+ In this path X is the scheme and authority that identify the file system,
+ and Y is the path leading up to the final path component "link". If Y is
+ a symlink itself then let Y' be the target of Y and X' be the scheme and
+ authority of Y'. Symlink targets may:
+
+ 1. Fully qualified URIs
+
+ fs://hostX/A/B/file Resolved according to the target file system.
+
+ 2. Partially qualified URIs (eg scheme but no host)
+
+ fs:///A/B/file Resolved according to the target file system. Eg resolving
+ a symlink to hdfs:///A results in an exception because
+ HDFS URIs must be fully qualified, while a symlink to
+ file:///A will not since Hadoop's local file systems
+ require partially qualified URIs.
+
+ 3. Relative paths
+
+ path Resolves to [Y'][path]. Eg if Y resolves to hdfs://host/A and path
+ is "../B/file" then [Y'][path] is hdfs://host/B/file
+
+ 4. Absolute paths
+
+ path Resolves to [X'][path]. Eg if Y resolves hdfs://host/A/B and path
+ is "/file" then [X][path] is hdfs://host/file
+
+
+ @param target the target of the symbolic link
+ @param link the path to be created that points to target
+ @param createParent if true then missing parent dirs are created if
+ false then parent must exist
+
+
+ @throws AccessControlException If access is denied
+ @throws FileAlreadyExistsException If file link already exists
+ @throws FileNotFoundException If target does not exist
+ @throws ParentNotDirectoryException If parent of link is not a
+ directory.
+ @throws UnsupportedFileSystemException If file system for
+ target or link is not supported
+ @throws IOException If an I/O error occurred]]>
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws UnsupportedFileSystemException If file system for f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+ f is
+ not supported
+ @throws IOException If an I/O error occurred
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ } describing
+ modifications
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+
+ } describing entries
+ to remove
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ } describing
+ modifications, must include entries for user, group, and others for
+ compatibility with permission bits.
+ @throws IOException if an ACL could not be modified]]>
+
+
+
+
+
+
+ } which returns
+ each AclStatus
+ @throws IOException if an ACL could not be read]]>
+
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to modify
+ @param name xattr name.
+ @param value xattr value.
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to modify
+ @param name xattr name.
+ @param value xattr value.
+ @param flag xattr set flag
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attribute
+ @param name xattr name.
+ @return byte[] xattr value.
+ @throws IOException]]>
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @return Map{@literal <}String, byte[]{@literal >} describing the XAttrs
+ of the file or directory
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @param names XAttr names.
+ @return Map{@literal <}String, byte[]{@literal >} describing the XAttrs
+ of the file or directory
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to remove extended attribute
+ @param name xattr name
+ @throws IOException]]>
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @return List{@literal <}String{@literal >} of the XAttr names of the
+ file or directory
+ @throws IOException]]>
+
+
+
+
+
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+ Exceptions applicable to file systems accessed over RPC:
+ @throws RpcClientException If an exception occurred in the RPC client
+ @throws RpcServerException If an exception occurred in the RPC server
+ @throws UnexpectedServerException If server implementation throws
+ undeclared exception to RPC server]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Path Names
+
+ The Hadoop file system supports a URI namespace and URI names. This enables
+ multiple types of file systems to be referenced using fully-qualified URIs.
+ Two common Hadoop file system implementations are
+
+
the local file system: file:///path
+
the HDFS file system: hdfs://nnAddress:nnPort/path
+
+
+ The Hadoop file system also supports additional naming schemes besides URIs.
+ Hadoop has the concept of a default file system, which implies a
+ default URI scheme and authority. This enables slash-relative names
+ relative to the default FS, which are more convenient for users and
+ application writers. The default FS is typically set by the user's
+ environment, though it can also be manually specified.
+
+
+ Hadoop also supports working-directory-relative names, which are paths
+ relative to the current working directory (similar to Unix). The working
+ directory can be in a different file system than the default FS.
+
+ Thus, Hadoop path names can be specified as one of the following:
+
+
a fully-qualified URI: scheme://authority/path (e.g.
+ hdfs://nnAddress:nnPort/foo/bar)
+
a slash-relative name: path relative to the default file system (e.g.
+ /foo/bar)
+
a working-directory-relative name: path relative to the working dir (e.g.
+ foo/bar)
+
+ Relative paths with scheme (scheme:foo/bar) are illegal.
+
+
Role of FileContext and Configuration Defaults
+
+ The FileContext is the analogue of per-process file-related state in Unix. It
+ contains two properties:
+
+
+
the default file system (for resolving slash-relative names)
+
the umask (for file permissions)
+
+ In general, these properties are obtained from the default configuration file
+ in the user's environment (see {@link Configuration}).
+
+ Further file system properties are specified on the server-side. File system
+ operations default to using these server-side defaults unless otherwise
+ specified.
+
+ The file system related server-side defaults are:
+
+
the home directory (default is "/user/userName")
+
the initial wd (only for local fs)
+
replication factor
+
block size
+
buffer size
+
encryptDataTransfer
+
checksum option. (checksumType and bytesPerChecksum)
+
+
+
Example Usage
+
+ Example 1: use the default config read from the $HADOOP_CONFIG/core.xml.
+ Unspecified values come from core-defaults.xml in the release jar.
+
+
myFContext = FileContext.getFileContext(); // uses the default config
+ // which has your default FS
+
myFContext.create(path, ...);
+
myFContext.setWorkingDir(path);
+
myFContext.open (path, ...);
+
...
+
+ Example 2: Get a FileContext with a specific URI as the default FS
+
+
myFContext = FileContext.getFileContext(URI);
+
myFContext.create(path, ...);
+
...
+
+ Example 3: FileContext with local file system as the default
+
+ If the configuration has the property
+ {@code "fs.$SCHEME.impl.disable.cache"} set to true,
+ a new instance will be created, initialized with the supplied URI and
+ configuration, then returned without being cached.
+
+
+ If the there is a cached FS instance matching the same URI, it will
+ be returned.
+
+
+ Otherwise: a new FS instance will be created, initialized with the
+ configuration and URI, cached and returned to the caller.
+
+
+ @throws IOException if the FileSystem cannot be instantiated.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ if f == null :
+ result = null
+ elif f.getLen() {@literal <=} start:
+ result = []
+ else result = [ locations(FS, b) for b in blocks(FS, p, s, s+l)]
+
+ This call is most helpful with and distributed filesystem
+ where the hostnames of machines that contain blocks of the given file
+ can be determined.
+
+ The default implementation returns an array containing one element:
+
+
+ And if a file is erasure-coded, the returned BlockLocation are logical
+ block groups.
+
+ Suppose we have a RS_3_2 coded file (3 data units and 2 parity units).
+ 1. If the file size is less than one stripe size, say 2 * CELL_SIZE, then
+ there will be one BlockLocation returned, with 0 offset, actual file size
+ and 4 hosts (2 data blocks and 2 parity blocks) hosting the actual blocks.
+ 3. If the file size is less than one group size but greater than one
+ stripe size, then there will be one BlockLocation returned, with 0 offset,
+ actual file size with 5 hosts (3 data blocks and 2 parity blocks) hosting
+ the actual blocks.
+ 4. If the file size is greater than one group size, 3 * BLOCK_SIZE + 123
+ for example, then the result will be like:
+
Fails if the parent of dst does not exist or is a file.
+
+
+ If OVERWRITE option is not passed as an argument, rename fails
+ if the dst already exists.
+
+ If OVERWRITE option is passed as an argument, rename overwrites
+ the dst if it is a file or an empty directory. Rename fails if dst is
+ a non-empty directory.
+
+ Note that atomicity of rename is dependent on the file system
+ implementation. Please refer to the file system documentation for
+ details. This default implementation is non atomic.
+
+ This method is deprecated since it is a temporary method added to
+ support the transition from FileSystem to FileContext for user
+ applications.
+
+ @param src path to be renamed
+ @param dst new path after rename
+ @throws FileNotFoundException src path does not exist, or the parent
+ path of dst does not exist.
+ @throws FileAlreadyExistsException dest path exists and is a file
+ @throws ParentNotDirectoryException if the parent path of dest is not
+ a directory
+ @throws IOException on failure]]>
+
+
+
+
+
+
+
+
+
Fails if path is a directory.
+
Fails if path does not exist.
+
Fails if path is not closed.
+
Fails if new size is greater than current size.
+
+ @param f The path to the file to be truncated
+ @param newLength The size the file is to be truncated to
+
+ @return true if the file has been truncated to the desired
+ newLength and is immediately available to be reused for
+ write operations such as append, or
+ false if a background process of adjusting the length of
+ the last block has been started, and clients should wait for it to
+ complete before proceeding with further file updates.
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default).]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Clean shutdown of the JVM cannot be guaranteed.
+
The time to shut down a FileSystem will depends on the number of
+ files to delete. For filesystems where the cost of checking
+ for the existence of a file/directory and the actual delete operation
+ (for example: object stores) is high, the time to shutdown the JVM can be
+ significantly extended by over-use of this feature.
+
Connectivity problems with a remote filesystem may delay shutdown
+ further, and may cause the files to not be deleted.
+
+ @param f the path to delete.
+ @return true if deleteOnExit is successful, otherwise false.
+ @throws IOException IO failure]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Does not guarantee to return the List of files/directories status in a
+ sorted order.
+
+ Will not return null. Expect IOException upon access error.
+ @param f given path
+ @return the statuses of the files/directories in the given patch
+ @throws FileNotFoundException when the path does not exist
+ @throws IOException see specific implementation]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Does not guarantee to return the List of files/directories status in a
+ sorted order.
+
+ @param f
+ a path name
+ @param filter
+ the user-supplied path filter
+ @return an array of FileStatus objects for the files under the given path
+ after applying the filter
+ @throws FileNotFoundException when the path does not exist
+ @throws IOException see specific implementation]]>
+
+
+
+
+
+
+
+
+ Does not guarantee to return the List of files/directories status in a
+ sorted order.
+
+ @param files
+ a list of paths
+ @return a list of statuses for the files under the given paths after
+ applying the filter default Path filter
+ @throws FileNotFoundException when the path does not exist
+ @throws IOException see specific implementation]]>
+
+
+
+
+
+
+
+
+
+ Does not guarantee to return the List of files/directories status in a
+ sorted order.
+
+ @param files
+ a list of paths
+ @param filter
+ the user-supplied path filter
+ @return a list of statuses for the files under the given paths after
+ applying the filter
+ @throws FileNotFoundException when the path does not exist
+ @throws IOException see specific implementation]]>
+
+
+
+
+
+
+ Return all the files that match filePattern and are not checksum
+ files. Results are sorted by their names.
+
+
+ A filename pattern is composed of regular characters and
+ special pattern matching characters, which are:
+
+
+
+
+
?
+
Matches any single character.
+
+
+
*
+
Matches zero or more characters.
+
+
+
[abc]
+
Matches a single character from character set
+ {a,b,c}.
+
+
+
[a-b]
+
Matches a single character from the character range
+ {a...b}. Note that character a must be
+ lexicographically less than or equal to character b.
+
+
+
[^a]
+
Matches a single character that is not from character set or range
+ {a}. Note that the ^ character must occur
+ immediately to the right of the opening bracket.
+
+
+
\c
+
Removes (escapes) any special meaning of character c.
+
+
+
{ab,cd}
+
Matches a string from the string set {ab, cd}
+
+
+
{ab,c{de,fh}}
+
Matches a string from the string set {ab, cde, cfh}
+
+
+
+
+
+ @param pathPattern a glob specifying a path pattern
+
+ @return an array of paths that match the path pattern
+ @throws IOException IO failure]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws IOException If an I/O error occurred]]>
+
+
+
+
+
+
+
+
+ f does not exist
+ @throws IOException if any I/O error occurred]]>
+
+
+
+
+
+
+
+ p does not exist
+ @throws IOException if any I/O error occurred]]>
+
+
+
+
+
+
+
+
+
+ If the path is a directory,
+ if recursive is false, returns files in the directory;
+ if recursive is true, return files in the subtree rooted at the path.
+ If the path is a file, return the file's status and block locations.
+
+ @param f is the path
+ @param recursive if the subdirectories need to be traversed recursively
+
+ @return an iterator that traverses statuses of the files
+
+ @throws FileNotFoundException when the path does not exist;
+ @throws IOException see specific implementation]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ undefined.
+ @throws IOException IO failure]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ In some FileSystem implementations such as HDFS metadata
+ synchronization is essential to guarantee consistency of read requests
+ particularly in HA setting.
+ @throws IOException
+ @throws UnsupportedOperationException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to modify
+ @param name xattr name.
+ @param value xattr value.
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to modify
+ @param name xattr name.
+ @param value xattr value.
+ @param flag xattr set flag
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attribute
+ @param name xattr name.
+ @return byte[] xattr value.
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @return Map describing the XAttrs of the file or directory
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @param names XAttr names.
+ @return Map describing the XAttrs of the file or directory
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to get extended attributes
+ @return List{@literal } of the XAttr names of the file or directory
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+
+ Refer to the HDFS extended attributes user documentation for details.
+
+ @param path Path to remove extended attribute
+ @param name xattr name
+ @throws IOException IO failure
+ @throws UnsupportedOperationException if the operation is unsupported
+ (default outcome).]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This is a default method which is intended to be overridden by
+ subclasses. The default implementation returns an empty storage statistics
+ object.
+
+ @return The StorageStatistics for this FileSystem instance.
+ Will never be null.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ All user code that may potentially use the Hadoop Distributed
+ File System should be written to use a FileSystem object or its
+ successor, {@link FileContext}.
+
+
+ The local implementation is {@link LocalFileSystem} and distributed
+ implementation is DistributedFileSystem. There are other implementations
+ for object stores and (outside the Apache Hadoop codebase),
+ third party filesystems.
+
+ Notes
+
+
The behaviour of the filesystem is
+
+ specified in the Hadoop documentation.
+ However, the normative specification of the behavior of this class is
+ actually HDFS: if HDFS does not behave the way these Javadocs or
+ the specification in the Hadoop documentations define, assume that
+ the documentation is incorrect.
+
+
The term {@code FileSystem} refers to an instance of this class.
+
The acronym "FS" is used as an abbreviation of FileSystem.
+
The term {@code filesystem} refers to the distributed/local filesystem
+ itself, rather than the class used to interact with it.
+
The term "file" refers to a file in the remote filesystem,
+ rather than instances of {@code java.io.File}.
+ Consult the filesystem specification document for the requirements
+ of an implementation of this interface.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Exceptions are caught and downgraded to debug logging.
+ @param source source of statistics.
+ @return a string for logging.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Whenever this object's toString() method is called, it evaluates the
+ statistics.
+
+ This is designed to affordable to use in log statements.
+ @param source source of statistics -may be null.
+ @return an object whose toString() operation returns the current values.]]>
+
+
+
+
+
+
+ Whenever this object's toString() method is called, it evaluates the
+ statistics.
+
+ This is for use in log statements where for the cost of creation
+ of this entry is low; it is affordable to use in log statements.
+ @param statistics statistics to stringify -may be null.
+ @return an object whose toString() operation returns the current values.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ It is serializable so that frameworks which can use java serialization
+ to propagate data (Spark, Flink...) can send the statistics
+ back. For this reason, TreeMaps are explicitly used as field types,
+ even though IDEs can recommend use of Map instead.
+ For security reasons, untrusted java object streams should never be
+ deserialized. If for some reason this is required, use
+ {@link #requiredSerializationClasses()} to get the list of classes
+ used when deserializing instances of this object.
+
+
+ It is annotated for correct serializations with jackson2.
+
]]>
+
+
+
+
+
+
+
+
+
+ This is not an atomic option.
+
+ The instance can be serialized, and its
+ {@code toString()} method lists all the values.
+ @param statistics statistics
+ @return a snapshot of the current values.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ It can be used to accrue values so as to dynamically update
+ the mean. If so, know that there is no synchronization
+ on the methods.
+
+
+ If a statistic has 0 samples then it is considered to be empty.
+
+
+ All 'empty' statistics are equivalent, independent of the sum value.
+
+
+ For non-empty statistics, sum and sample values must match
+ for equality.
+
+
+ It is serializable and annotated for correct serializations with jackson2.
+
+
+ Thread safety. The operations to add/copy sample data, are thread safe.
+
+
+
{@link #add(MeanStatistic)}
+
{@link #addSample(long)}
+
{@link #clear()}
+
{@link #setSamplesAndSum(long, long)}
+
{@link #set(MeanStatistic)}
+
{@link #setSamples(long)} and {@link #setSum(long)}
+
+
+ So is the {@link #mean()} method. This ensures that when
+ used to aggregated statistics, the aggregate value and sample
+ count are set and evaluated consistently.
+
+
+ Other methods marked as synchronized because Findbugs overreacts
+ to the idea that some operations to update sum and sample count
+ are synchronized, but that things like equals are not.
+
+ The name of the constants are uppercase, words separated by
+ underscores.
+
+
+ The value of the constants are lowercase of the constant names.
+
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Since these methods are often vendor- or device-specific, operators
+ may implement this interface in order to achieve fencing.
+
+ Fencing is configured by the operator as an ordered list of methods to
+ attempt. Each method will be tried in turn, and the next in the list
+ will only be attempted if the previous one fails. See {@link NodeFencer}
+ for more information.
+
+ If an implementation also implements {@link Configurable} then its
+ setConf method will be called upon instantiation.]]>
+
StaticUserWebFilter - An authorization plugin that makes all
+users a static configured user.
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ public class IntArrayWritable extends ArrayWritable {
+ public IntArrayWritable() {
+ super(IntWritable.class);
+ }
+ }
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a ByteWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ the class of the item
+ @param conf the configuration to store
+ @param item the object to be stored
+ @param keyName the name of the key to use
+ @throws IOException : forwards Exceptions from the underlying
+ {@link Serialization} classes.]]>
+
+
+
+
+
+
+
+
+ the class of the item
+ @param conf the configuration to use
+ @param keyName the name of the key to use
+ @param itemClass the class of the item
+ @return restored object
+ @throws IOException : forwards Exceptions from the underlying
+ {@link Serialization} classes.]]>
+
+
+
+
+
+
+
+
+ the class of the item
+ @param conf the configuration to use
+ @param items the objects to be stored
+ @param keyName the name of the key to use
+ @throws IndexOutOfBoundsException if the items array is empty
+ @throws IOException : forwards Exceptions from the underlying
+ {@link Serialization} classes.]]>
+
+
+
+
+
+
+
+
+ the class of the item
+ @param conf the configuration to use
+ @param keyName the name of the key to use
+ @param itemClass the class of the item
+ @return restored object
+ @throws IOException : forwards Exceptions from the underlying
+ {@link Serialization} classes.]]>
+
+
+
+
+ DefaultStringifier offers convenience methods to store/load objects to/from
+ the configuration.
+
+ @param the class of the objects to stringify]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a DoubleWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ value argument is null or
+ its size is zero, the elementType argument must not be null. If
+ the argument value's size is bigger than zero, the argument
+ elementType is not be used.
+
+ @param value
+ @param elementType]]>
+
+
+
+
+ value should not be null
+ or empty.
+
+ @param value]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ value and elementType. If the value argument
+ is null or its size is zero, the elementType argument must not be
+ null. If the argument value's size is bigger than zero, the
+ argument elementType is not be used.
+
+ @param value
+ @param elementType]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is an EnumSetWritable with the same value,
+ or both are null.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a FloatWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ When two sequence files, which have same Key type but different Value
+ types, are mapped out to reduce, multiple Value types is not allowed.
+ In this case, this class can help you wrap instances with different types.
+
+
+
+ Compared with ObjectWritable, this class is much more effective,
+ because ObjectWritable will append the class declaration as a String
+ into the output file in every Key-Value pair.
+
+
+
+ Generic Writable implements {@link Configurable} interface, so that it will be
+ configured by the framework. The configuration is passed to the wrapped objects
+ implementing {@link Configurable} interface before deserialization.
+
+
+ how to use it:
+ 1. Write your own class, such as GenericObject, which extends GenericWritable.
+ 2. Implements the abstract method getTypes(), defines
+ the classes which will be wrapped in GenericObject in application.
+ Attention: this classes defined in getTypes() method, must
+ implement Writable interface.
+
+
+ @since Nov 8, 2006]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a IntWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ closes the input and output streams
+ at the end.
+
+ @param in InputStrem to read from
+ @param out OutputStream to write to
+ @param conf the Configuration object]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ignore any {@link Throwable} or
+ null pointers. Must only be used for cleanup in exception handlers.
+
+ @param log the log to record problems to at debug level. Can be null.
+ @param closeables the objects to close
+ @deprecated use {@link #cleanupWithLogger(Logger, java.io.Closeable...)}
+ instead]]>
+
+
+
+
+
+
+ ignore any {@link Throwable} or
+ null pointers. Must only be used for cleanup in exception handlers.
+
+ @param logger the log to record problems to at debug level. Can be null.
+ @param closeables the objects to close]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This is better than File#listDir because it does not ignore IOExceptions.
+
+ @param dir The directory to list.
+ @param filter If non-null, the filter to use when listing
+ this directory.
+ @return The list of files in the directory.
+
+ @throws IOException On I/O error]]>
+
+
+
+
+
+
+
+ Borrowed from Uwe Schindler in LUCENE-5588
+ @param fileToSync the file to fsync]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a LongWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ A map is a directory containing two files, the data file,
+ containing all keys and values in the map, and a smaller index
+ file, containing a fraction of the keys. The fraction is determined by
+ {@link Writer#getIndexInterval()}.
+
+
The index file is read entirely into memory. Thus key implementations
+ should try to keep themselves small.
+
+
Map files are created by adding entries in-order. To maintain a large
+ database, perform updates by copying the previous version of a database and
+ merging in a sorted change list, to create a new version of the database in
+ a new file. Sorting large change lists can be done with {@link
+ SequenceFile.Sorter}.]]>
+
SequenceFile provides {@link SequenceFile.Writer},
+ {@link SequenceFile.Reader} and {@link Sorter} classes for writing,
+ reading and sorting respectively.
+
+ There are three SequenceFileWriters based on the
+ {@link CompressionType} used to compress key/value pairs:
+
+
+ Writer : Uncompressed records.
+
+
+ RecordCompressWriter : Record-compressed files, only compress
+ values.
+
+
+ BlockCompressWriter : Block-compressed files, both keys &
+ values are collected in 'blocks'
+ separately and compressed. The size of
+ the 'block' is configurable.
+
+
+
The actual compression algorithm used to compress key and/or values can be
+ specified by using the appropriate {@link CompressionCodec}.
+
+
The recommended way is to use the static createWriter methods
+ provided by the SequenceFile to chose the preferred format.
+
+
The {@link SequenceFile.Reader} acts as the bridge and can read any of the
+ above SequenceFile formats.
+
+
SequenceFile Formats
+
+
Essentially there are 3 different formats for SequenceFiles
+ depending on the CompressionType specified. All of them share a
+ common header described below.
+
+
SequenceFile Header
+
+
+ version - 3 bytes of magic header SEQ, followed by 1 byte of actual
+ version number (e.g. SEQ4 or SEQ6)
+
+
+ keyClassName -key class
+
+
+ valueClassName - value class
+
+
+ compression - A boolean which specifies if compression is turned on for
+ keys/values in this file.
+
+
+ blockCompression - A boolean which specifies if block-compression is
+ turned on for keys/values in this file.
+
+
+ compression codec - CompressionCodec class which is used for
+ compression of keys and/or values (if compression is
+ enabled).
+
+
+ metadata - {@link Metadata} for this file.
+
+
+ sync - A sync marker to denote end of the header.
+
The compressed blocks of key lengths and value lengths consist of the
+ actual lengths of individual keys/values encoded in ZeroCompressedInteger
+ format.
+
+ @see CompressionCodec]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a ShortWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ the class of the objects to stringify]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ position. Note that this
+ method avoids using the converter or doing String instantiation
+ @return the Unicode scalar value at position or -1
+ if the position is invalid or points to a
+ trailing byte]]>
+
+
+
+
+
+
+
+
+
+ what in the backing
+ buffer, starting as position start. The starting
+ position is measured in bytes and the return value is in
+ terms of byte position in the buffer. The backing buffer is
+ not converted to a string for this operation.
+ @return byte position of the first occurrence of the search
+ string in the UTF-8 buffer or -1 if not found]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Note: For performance reasons, this call does not clear the
+ underlying byte array that is retrievable via {@link #getBytes()}.
+ In order to free the byte-array memory, call {@link #set(byte[])}
+ with an empty byte array (For example, new byte[0]).]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a Text with the same contents.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ replace is true, then
+ malformed input is replaced with the
+ substitution character, which is U+FFFD. Otherwise the
+ method throws a MalformedInputException.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ replace is true, then
+ malformed input is replaced with the
+ substitution character, which is U+FFFD. Otherwise the
+ method throws a MalformedInputException.
+ @return ByteBuffer: bytes stores at ByteBuffer.array()
+ and length is ByteBuffer.limit()]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ In
+ addition, it provides methods for string traversal without converting the
+ byte array to a string.
Also includes utilities for
+ serializing/deserialing a string, coding/decoding a string, checking if a
+ byte array contains valid UTF8 code, calculating the length of an encoded
+ string.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This is useful when a class may evolve, so that instances written by the
+ old version of the class may still be processed by the new version. To
+ handle this situation, {@link #readFields(DataInput)}
+ implementations should catch {@link VersionMismatchException}.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a VIntWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ o is a VLongWritable with the same value.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ out.
+
+ @param out DataOuput to serialize this object into.
+ @throws IOException]]>
+
+
+
+
+
+
+ in.
+
+
For efficiency, implementations should attempt to re-use storage in the
+ existing object where possible.
+
+ @param in DataInput to deseriablize this object from.
+ @throws IOException]]>
+
+
+
+ Any key or value type in the Hadoop Map-Reduce
+ framework implements this interface.
+
+
Implementations typically implement a static read(DataInput)
+ method which constructs a new instance, calls {@link #readFields(DataInput)}
+ and returns the instance.
+
+
Example:
+
+ public class MyWritable implements Writable {
+ // Some data
+ private int counter;
+ private long timestamp;
+
+ // Default constructor to allow (de)serialization
+ MyWritable() { }
+
+ public void write(DataOutput out) throws IOException {
+ out.writeInt(counter);
+ out.writeLong(timestamp);
+ }
+
+ public void readFields(DataInput in) throws IOException {
+ counter = in.readInt();
+ timestamp = in.readLong();
+ }
+
+ public static MyWritable read(DataInput in) throws IOException {
+ MyWritable w = new MyWritable();
+ w.readFields(in);
+ return w;
+ }
+ }
+
]]>
+
+
+
+
+
+
+
+
+ WritableComparables can be compared to each other, typically
+ via Comparators. Any type which is to be used as a
+ key in the Hadoop Map-Reduce framework should implement this
+ interface.
+
+
Note that hashCode() is frequently used in Hadoop to partition
+ keys. It's important that your implementation of hashCode() returns the same
+ result across different instances of the JVM. Note also that the default
+ hashCode() implementation in Object does not
+ satisfy this property.
+
+
Example:
+
+ public class MyWritableComparable implements
+ WritableComparable{@literal } {
+ // Some data
+ private int counter;
+ private long timestamp;
+
+ public void write(DataOutput out) throws IOException {
+ out.writeInt(counter);
+ out.writeLong(timestamp);
+ }
+
+ public void readFields(DataInput in) throws IOException {
+ counter = in.readInt();
+ timestamp = in.readLong();
+ }
+
+ public int compareTo(MyWritableComparable o) {
+ int thisValue = this.value;
+ int thatValue = o.value;
+ return (thisValue < thatValue ? -1 : (thisValue==thatValue ? 0 : 1));
+ }
+
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + counter;
+ result = prime * result + (int) (timestamp ^ (timestamp >>> 32));
+ return result
+ }
+ }
+
One may optimize compare-intensive operations by overriding
+ {@link #compare(byte[],int,int,byte[],int,int)}. Static utility methods are
+ provided to assist in optimized implementations of this method.]]>
+
+ The code alias is the short class name (without the package name).
+ If the short class name ends with 'Codec', then there are two aliases for
+ the codec, the complete short class name and the short class name without
+ the 'Codec' ending. For example for the 'GzipCodec' codec class name the
+ alias are 'gzip' and 'gzipcodec'.
+
+ @param codecName the canonical class name of the codec
+ @return the codec object]]>
+
+
+
+
+
+
+ Codec aliases are case insensitive.
+
+ The code alias is the short class name (without the package name).
+ If the short class name ends with 'Codec', then there are two aliases for
+ the codec, the complete short class name and the short class name without
+ the 'Codec' ending. For example for the 'GzipCodec' codec class name the
+ alias are 'gzip' and 'gzipcodec'.
+
+ @param codecName the canonical class name of the codec
+ @return the codec class]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Implementations are assumed to be buffered. This permits clients to
+ reposition the underlying input stream then call {@link #resetState()},
+ without having to also synchronize client buffers.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true indicating that more input data is required.
+
+ @param b Input data
+ @param off Start offset
+ @param len Length]]>
+
+
+
+
+ true if the input data buffer is empty and
+ #setInput() should be called in order to provide more input.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true if the end of the compressed
+ data output stream has been reached.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true indicating that more input data is required.
+ (Both native and non-native versions of various Decompressors require
+ that the data passed in via b[] remain unmodified until
+ the caller is explicitly notified--via {@link #needsInput()}--that the
+ buffer may be safely modified. With this requirement, an extra
+ buffer-copy can be avoided.)
+
+ @param b Input data
+ @param off Start offset
+ @param len Length]]>
+
+
+
+
+ true if the input data buffer is empty and
+ {@link #setInput(byte[], int, int)} should be called to
+ provide more input.
+
+ @return true if the input data buffer is empty and
+ {@link #setInput(byte[], int, int)} should be called in
+ order to provide more input.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ true if a preset dictionary is needed for decompression.
+ @return true if a preset dictionary is needed for decompression]]>
+
+
+
+
+ true if the end of the decompressed
+ data output stream has been reached. Indicates a concatenated data stream
+ when finished() returns true and {@link #getRemaining()}
+ returns a positive value. finished() will be reset with the
+ {@link #reset()} method.
+ @return true if the end of the decompressed
+ data output stream has been reached.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ true and getRemaining() returns a positive value. If
+ {@link #finished()} returns true and getRemaining() returns
+ a zero value, indicates that the end of data stream has been reached and
+ is not a concatenated data stream.
+ @return The number of bytes remaining in the compressed data buffer.]]>
+
+
+
+
+ true and {@link #getRemaining()} returns a positive value,
+ reset() is called before processing of the next data stream in the
+ concatenated data stream. {@link #finished()} will be reset and will
+ return false when reset() is called.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ io.compression.codecs = org.apache.hadoop.io.compress.PassthroughCodec
+ io.compress.passthrough.extension = .gz
+
+
+ Note: this is not a Splittable codec: it doesn't know the
+ capabilities of the passed in stream. It should be possible to
+ extend this in a subclass: the inner classes are marked as protected
+ to enable this. Do not retrofit splitting to this class..]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
Seek by key or by file offset.
+
+ The memory footprint of a TFile includes the following:
+
+
Some constant overhead of reading or writing a compressed block.
+
+
Each compressed block requires one compression/decompression codec for
+ I/O.
+
Temporary space to buffer the key.
+
Temporary space to buffer the value (for TFile.Writer only). Values are
+ chunk encoded, so that we buffer at most one chunk of user data. By default,
+ the chunk buffer is 1MB. Reading chunked value does not require additional
+ memory.
+
+
TFile index, which is proportional to the total number of Data Blocks.
+ The total amount of memory needed to hold the index can be estimated as
+ (56+AvgKeySize)*NumBlocks.
+
MetaBlock index, which is proportional to the total number of Meta
+ Blocks.The total amount of memory needed to hold the index for Meta Blocks
+ can be estimated as (40+AvgMetaBlockName)*NumMetaBlock.
+
+
+ The behavior of TFile can be customized by the following variables through
+ Configuration:
+
+
tfile.io.chunk.size: Value chunk size. Integer (in bytes). Default
+ to 1MB. Values of the length less than the chunk size is guaranteed to have
+ known value length in read time (See
+ {@link TFile.Reader.Scanner.Entry#isValueLengthKnown()}).
+
tfile.fs.output.buffer.size: Buffer size used for
+ FSDataOutputStream. Integer (in bytes). Default to 256KB.
+
tfile.fs.input.buffer.size: Buffer size used for
+ FSDataInputStream. Integer (in bytes). Default to 256KB.
+
+
+ Suggestions on performance optimization.
+
+
Minimum block size. We recommend a setting of minimum block size between
+ 256KB to 1MB for general usage. Larger block size is preferred if files are
+ primarily for sequential access. However, it would lead to inefficient random
+ access (because there are more data to decompress). Smaller blocks are good
+ for random access, but require more memory to hold the block index, and may
+ be slower to create (because we must flush the compressor stream at the
+ conclusion of each data block, which leads to an FS I/O flush). Further, due
+ to the internal caching in Compression codec, the smallest possible block
+ size would be around 20KB-30KB.
+
The current implementation does not offer true multi-threading for
+ reading. The implementation uses FSDataInputStream seek()+read(), which is
+ shown to be much faster than positioned-read call in single thread mode.
+ However, it also means that if multiple threads attempt to access the same
+ TFile (using multiple scanners) simultaneously, the actual I/O is carried out
+ sequentially even if they access different DFS blocks.
+
Compression codec. Use "none" if the data is not very compressable (by
+ compressable, I mean a compression ratio at least 2:1). Generally, use "lzo"
+ as the starting point for experimenting. "gz" overs slightly better
+ compression ratio over "lzo" but requires 4x CPU to compress and 2x CPU to
+ decompress, comparing to "lzo".
+
File system buffering, if the underlying FSDataInputStream and
+ FSDataOutputStream is already adequately buffered; or if applications
+ reads/writes keys and values in large buffers, we can reduce the sizes of
+ input/output buffering in TFile layer by setting the configuration parameters
+ "tfile.fs.input.buffer.size" and "tfile.fs.output.buffer.size".
+
+
+ Some design rationale behind TFile can be found at Hadoop-3315.]]>
+
+
+
+
+
+
+
+
+
+
+ Utils#writeVLong(out, n).
+
+ @param out
+ output stream
+ @param n
+ The integer to be encoded
+ @throws IOException
+ @see Utils#writeVLong(DataOutput, long)]]>
+
+
+
+
+
+
+
+
+
if n in [-32, 127): encode in one byte with the actual value.
+ Otherwise,
+
if n in [-20*2^8, 20*2^8): encode in two bytes: byte[0] = n/256 - 52;
+ byte[1]=n&0xff. Otherwise,
+
if n IN [-16*2^16, 16*2^16): encode in three bytes: byte[0]=n/2^16 -
+ 88; byte[1]=(n>>8)&0xff; byte[2]=n&0xff. Otherwise,
+
if n in [-8*2^24, 8*2^24): encode in four bytes: byte[0]=n/2^24 - 112;
+ byte[1] = (n>>16)&0xff; byte[2] = (n>>8)&0xff;
+ byte[3]=n&0xff.
+ Otherwise:
+
if n in [-2^31, 2^31): encode in five bytes: byte[0]=-125; byte[1] =
+ (n>>24)&0xff; byte[2]=(n>>16)&0xff;
+ byte[3]=(n>>8)&0xff; byte[4]=n&0xff;
+
if n in [-2^39, 2^39): encode in six bytes: byte[0]=-124; byte[1] =
+ (n>>32)&0xff; byte[2]=(n>>24)&0xff;
+ byte[3]=(n>>16)&0xff; byte[4]=(n>>8)&0xff;
+ byte[5]=n&0xff
+
if n in [-2^47, 2^47): encode in seven bytes: byte[0]=-123; byte[1] =
+ (n>>40)&0xff; byte[2]=(n>>32)&0xff;
+ byte[3]=(n>>24)&0xff; byte[4]=(n>>16)&0xff;
+ byte[5]=(n>>8)&0xff; byte[6]=n&0xff;
+
if n in [-2^55, 2^55): encode in eight bytes: byte[0]=-122; byte[1] =
+ (n>>48)&0xff; byte[2] = (n>>40)&0xff;
+ byte[3]=(n>>32)&0xff; byte[4]=(n>>24)&0xff; byte[5]=
+ (n>>16)&0xff; byte[6]=(n>>8)&0xff; byte[7]=n&0xff;
+
if n in [-2^63, 2^63): encode in nine bytes: byte[0]=-121; byte[1] =
+ (n>>54)&0xff; byte[2] = (n>>48)&0xff;
+ byte[3] = (n>>40)&0xff; byte[4]=(n>>32)&0xff;
+ byte[5]=(n>>24)&0xff; byte[6]=(n>>16)&0xff; byte[7]=
+ (n>>8)&0xff; byte[8]=n&0xff;
+
+
+ @param out
+ output stream
+ @param n
+ the integer number
+ @throws IOException]]>
+
if (FB in [-72, -33]), return (FB+52)<<8 + NB[0]&0xff;
+
if (FB in [-104, -73]), return (FB+88)<<16 +
+ (NB[0]&0xff)<<8 + NB[1]&0xff;
+
if (FB in [-120, -105]), return (FB+112)<<24 + (NB[0]&0xff)
+ <<16 + (NB[1]&0xff)<<8 + NB[2]&0xff;
+
if (FB in [-128, -121]), return interpret NB[FB+129] as a signed
+ big-endian integer.
+
+ @param in
+ input stream
+ @return the decoded long integer.
+ @throws IOException]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Type of the input key.
+ @param list
+ The list
+ @param key
+ The input key.
+ @param cmp
+ Comparator for the key.
+ @return The index to the desired element if it exists; or list.size()
+ otherwise.]]>
+
+
+
+
+
+
+
+
+ Type of the input key.
+ @param list
+ The list
+ @param key
+ The input key.
+ @param cmp
+ Comparator for the key.
+ @return The index to the desired element if it exists; or list.size()
+ otherwise.]]>
+
+
+
+
+
+
+
+ Type of the input key.
+ @param list
+ The list
+ @param key
+ The input key.
+ @return The index to the desired element if it exists; or list.size()
+ otherwise.]]>
+
+
+
+
+
+
+
+ Type of the input key.
+ @param list
+ The list
+ @param key
+ The input key.
+ @return The index to the desired element if it exists; or list.size()
+ otherwise.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ An experimental {@link Serialization} for Java {@link Serializable} classes.
+
+ @see JavaSerializationComparator]]>
+
+
+
+
+
+
+
+
+
+
+ A {@link RawComparator} that uses a {@link JavaSerialization}
+ {@link Deserializer} to deserialize objects that are then compared via
+ their {@link Comparable} interfaces.
+
+ @param
+ @see JavaSerialization]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+This package provides a mechanism for using different serialization frameworks
+in Hadoop. The property "io.serializations" defines a list of
+{@link org.apache.hadoop.io.serializer.Serialization}s that know how to create
+{@link org.apache.hadoop.io.serializer.Serializer}s and
+{@link org.apache.hadoop.io.serializer.Deserializer}s.
+
+
+
+To add a new serialization framework write an implementation of
+{@link org.apache.hadoop.io.serializer.Serialization} and add its name to the
+"io.serializations" property.
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ avro.reflect.pkgs or implement
+ {@link AvroReflectSerializable} interface.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+This package provides Avro serialization in Hadoop. This can be used to
+serialize/deserialize Avro types in Hadoop.
+
+
+
+Use {@link org.apache.hadoop.io.serializer.avro.AvroSpecificSerialization} for
+serialization of classes generated by Avro's 'specific' compiler.
+
+
+
+Use {@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization} for
+other classes.
+{@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization} work for
+any class which is either in the package list configured via
+{@link org.apache.hadoop.io.serializer.avro.AvroReflectSerialization#AVRO_REFLECT_PACKAGES}
+or implement {@link org.apache.hadoop.io.serializer.avro.AvroReflectSerializable}
+interface.
+
{@link MetricsSource} generate and update metrics information.
+
{@link MetricsSink} consume the metrics information
+
+
+ {@link MetricsSource} and {@link MetricsSink} register with the metrics
+ system. Implementations of {@link MetricsSystem} polls the
+ {@link MetricsSource}s periodically and pass the {@link MetricsRecord}s to
+ {@link MetricsSink}.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ } (aggregate).
+ Filter out entries that don't have at least minSamples.
+
+ @return a map of peer DataNode Id to the average latency to that
+ node seen over the measurement period.]]>
+
+
+
+
+
+
+
+
+
+
+ This class maintains a group of rolling average metrics. It implements the
+ algorithm of rolling average, i.e. a number of sliding windows are kept to
+ roll over and evict old subsets of samples. Each window has a subset of
+ samples in a stream, where sub-sum and sub-total are collected. All sub-sums
+ and sub-totals in all windows will be aggregated to final-sum and final-total
+ used to compute final average, which is called rolling average.
+ ]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This class is a metrics sink that uses
+ {@link org.apache.hadoop.fs.FileSystem} to write the metrics logs. Every
+ roll interval a new directory will be created under the path specified by the
+ basepath property. All metrics will be logged to a file in the
+ current interval's directory in a file named <hostname>.log, where
+ <hostname> is the name of the host on which the metrics logging
+ process is running. The base path is set by the
+ <prefix>.sink.<instance>.basepath property. The
+ time zone used to create the current interval's directory name is GMT. If
+ the basepath property isn't specified, it will default to
+ "/tmp", which is the temp directory on whatever default file
+ system is configured for the cluster.
+
+
The <prefix>.sink.<instance>.ignore-error
+ property controls whether an exception is thrown when an error is encountered
+ writing a log file. The default value is true. When set to
+ false, file errors are quietly swallowed.
+
+
The roll-interval property sets the amount of time before
+ rolling the directory. The default value is 1 hour. The roll interval may
+ not be less than 1 minute. The property's value should be given as
+ number unit, where number is an integer value, and
+ unit is a valid unit. Valid units are minute, hour,
+ and day. The units are case insensitive and may be abbreviated or
+ plural. If no units are specified, hours are assumed. For example,
+ "2", "2h", "2 hour", and
+ "2 hours" are all valid ways to specify two hours.
+
+
The roll-offset-interval-millis property sets the upper
+ bound on a random time interval (in milliseconds) that is used to delay
+ before the initial roll. All subsequent rolls will happen an integer
+ number of roll intervals after the initial roll, hence retaining the original
+ offset. The purpose of this property is to insert some variance in the roll
+ times so that large clusters using this sink on every node don't cause a
+ performance impact on HDFS by rolling simultaneously. The default value is
+ 30000 (30s). When writing to HDFS, as a rule of thumb, the roll offset in
+ millis should be no less than the number of sink instances times 5.
+
+
The primary use of this class is for logging to HDFS. As it uses
+ {@link org.apache.hadoop.fs.FileSystem} to access the target file system,
+ however, it can be used to write to the local file system, Amazon S3, or any
+ other supported file system. The base path for the sink will determine the
+ file system used. An unqualified path will write to the default file system
+ set by the configuration.
+
+
Not all file systems support the ability to append to files. In file
+ systems without the ability to append to files, only one writer can write to
+ a file at a time. To allow for concurrent writes from multiple daemons on a
+ single host, the source property is used to set unique headers
+ for the log files. The property should be set to the name of
+ the source daemon, e.g. namenode. The value of the
+ source property should typically be the same as the property's
+ prefix. If this property is not set, the source is taken to be
+ unknown.
+
+
Instead of appending to an existing file, by default the sink
+ will create a new file with a suffix of ".<n>", where
+ n is the next lowest integer that isn't already used in a file name,
+ similar to the Hadoop daemon logs. NOTE: the file with the highest
+ sequence number is the newest file, unlike the Hadoop daemon logs.
+
+
For file systems that allow append, the sink supports appending to the
+ existing file instead. If the allow-append property is set to
+ true, the sink will instead append to the existing file on file systems that
+ support appends. By default, the allow-append property is
+ false.
+
+
Note that when writing to HDFS with allow-append set to true,
+ there is a minimum acceptable number of data nodes. If the number of data
+ nodes drops below that minimum, the append will succeed, but reading the
+ data will fail with an IOException in the DataStreamer class. The minimum
+ number of data nodes required for a successful append is generally 2 or
+ 3.
+
+
Note also that when writing to HDFS, the file size information is not
+ updated until the file is closed (at the end of the interval) even though
+ the data is being written successfully. This is a known HDFS limitation that
+ exists because of the performance cost of updating the metadata. See
+ HDFS-5478.
+
+
When using this sink in a secure (Kerberos) environment, two additional
+ properties must be set: keytab-key and
+ principal-key. keytab-key should contain the key by
+ which the keytab file can be found in the configuration, for example,
+ yarn.nodemanager.keytab. principal-key should
+ contain the key by which the principal can be found in the configuration,
+ for example, yarn.nodemanager.principal.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ CollectD StatsD plugin).
+
+ To configure this plugin, you will need to add the following
+ entries to your hadoop-metrics2.properties file:
+
+
+ *.sink.statsd.class=org.apache.hadoop.metrics2.sink.StatsDSink
+ [prefix].sink.statsd.server.host=
+ [prefix].sink.statsd.server.port=
+ [prefix].sink.statsd.skip.hostname=true|false (optional)
+ [prefix].sink.statsd.service.name=NameNode (name you want for service)
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ,name=}"
+ Where the {@literal and } are the supplied
+ parameters.
+
+ @param serviceName
+ @param nameName
+ @param theMbean - the MBean to register
+ @return the named used to register the MBean]]>
+
+
+
+
+
+
+
+
+ ,name=}"
+ Where the {@literal and } are the supplied
+ parameters.
+
+ @param serviceName
+ @param nameName
+ @param properties - Key value pairs to define additional JMX ObjectName
+ properties.
+ @param theMbean - the MBean to register
+ @return the named used to register the MBean]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ hostname or hostname:port. If
+ the specs string is null, defaults to localhost:defaultPort.
+
+ @param specs server specs (see description)
+ @param defaultPort the default port if not specified
+ @return a list of InetSocketAddress objects.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This method is used when parts of Hadoop need know whether to apply
+ single rack vs multi-rack policies, such as during block placement.
+ Such algorithms behave differently if they are on multi-switch systems.
+
+
+ @return true if the mapping thinks that it is on a single switch]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This predicate simply assumes that all mappings not derived from
+ this class are multi-switch.
+ @param mapping the mapping to query
+ @return true if the base class says it is single switch, or the mapping
+ is not derived from this class.]]>
+
+
+
+ It is not mandatory to
+ derive {@link DNSToSwitchMapping} implementations from it, but it is strongly
+ recommended, as it makes it easy for the Hadoop developers to add new methods
+ to this base class that are automatically picked up by all implementations.
+
+
+ This class does not extend the Configured
+ base class, and should not be changed to do so, as it causes problems
+ for subclasses. The constructor of the Configured calls
+ the {@link #setConf(Configuration)} method, which will call into the
+ subclasses before they have been fully constructed.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ If a name cannot be resolved to a rack, the implementation
+ should return {@link NetworkTopology#DEFAULT_RACK}. This
+ is what the bundled implementations do, though it is not a formal requirement
+
+ @param names the list of hosts to resolve (can be empty)
+ @return list of resolved network paths.
+ If names is empty, the returned list is also empty]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Calling {@link #setConf(Configuration)} will trigger a
+ re-evaluation of the configuration settings and so be used to
+ set up the mapping script.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This will get called in the superclass constructor, so a check is needed
+ to ensure that the raw mapping is defined before trying to relaying a null
+ configuration.
+ @param conf]]>
+
+
+
+
+
+
+
+
+
+ It contains a static class RawScriptBasedMapping that performs
+ the work: reading the configuration parameters, executing any defined
+ script, handling errors and such like. The outer
+ class extends {@link CachedDNSToSwitchMapping} to cache the delegated
+ queries.
+
+ This DNS mapper's {@link #isSingleSwitch()} predicate returns
+ true if and only if a script is defined.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Simple {@link DNSToSwitchMapping} implementation that reads a 2 column text
+ file. The columns are separated by whitespace. The first column is a DNS or
+ IP address and the second column specifies the rack where the address maps.
+
+
+ This class uses the configuration parameter {@code
+ net.topology.table.file.name} to locate the mapping file.
+
+
+ Calls to {@link #resolve(List)} will look up the address as defined in the
+ mapping file. If no entry corresponding to the address is found, the value
+ {@code /default-rack} is returned.
+
+ An instance of the default {@link DelegationTokenAuthenticator} will be
+ used.]]>
+
+
+
+
+ DelegationTokenAuthenticatedURL.
+
+ @param authenticator the {@link DelegationTokenAuthenticator} instance to
+ use, if null the default one will be used.]]>
+
+
+
+
+ DelegationTokenAuthenticatedURL using the default
+ {@link DelegationTokenAuthenticator} class.
+
+ @param connConfigurator a connection configurator.]]>
+
+
+
+
+ DelegationTokenAuthenticatedURL.
+
+ @param authenticator the {@link DelegationTokenAuthenticator} instance to
+ use, if null the default one will be used.
+ @param connConfigurator a connection configurator.]]>
+
+
+
+
+
+
+
+
+
+
+
+ The default class is {@link KerberosDelegationTokenAuthenticator}
+
+ @return the delegation token authenticator class to use as default.]]>
+
+
+
+
+
+
+ This method is provided to enable WebHDFS backwards compatibility.
+
+ @param useQueryString TRUE if the token is transmitted in the
+ URL query string, FALSE if the delegation token is transmitted
+ using the {@link DelegationTokenAuthenticator#DELEGATION_TOKEN_HEADER} HTTP
+ header.]]>
+
+
+
+
+ TRUE if the token is transmitted in the URL query
+ string, FALSE if the delegation token is transmitted using the
+ {@link DelegationTokenAuthenticator#DELEGATION_TOKEN_HEADER} HTTP header.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Authenticator.
+
+ @param url the URL to connect to. Only HTTP/S URLs are supported.
+ @param token the authentication token being used for the user.
+ @return an authenticated {@link HttpURLConnection}.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+ Authenticator. If the doAs parameter is not NULL,
+ the request will be done on behalf of the specified doAs user.
+
+ @param url the URL to connect to. Only HTTP/S URLs are supported.
+ @param token the authentication token being used for the user.
+ @param doAs user to do the the request on behalf of, if NULL the request is
+ as self.
+ @return an authenticated {@link HttpURLConnection}.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+ Authenticator
+ for authentication.
+
+ @param url the URL to get the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token being used for the user where the
+ Delegation token will be stored.
+ @param renewer the renewer user.
+ @return a delegation token.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+
+ Authenticator
+ for authentication.
+
+ @param url the URL to get the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token being used for the user where the
+ Delegation token will be stored.
+ @param renewer the renewer user.
+ @param doAsUser the user to do as, which will be the token owner.
+ @return a delegation token.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+ Authenticator for authentication.
+
+ @param url the URL to renew the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token with the Delegation Token to renew.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+ Authenticator for authentication.
+
+ @param url the URL to renew the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token with the Delegation Token to renew.
+ @param doAsUser the user to do as, which will be the token owner.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+ Authenticator.
+
+ @param url the URL to cancel the delegation token from. Only HTTP/S URLs
+ are supported.
+ @param token the authentication token with the Delegation Token to cancel.
+ @throws IOException if an IO error occurred.]]>
+
+
+
+
+
+
+
+
+ Authenticator.
+
+ @param url the URL to cancel the delegation token from. Only HTTP/S URLs
+ are supported.
+ @param token the authentication token with the Delegation Token to cancel.
+ @param doAsUser the user to do as, which will be the token owner.
+ @throws IOException if an IO error occurred.]]>
+
+
+
+ DelegationTokenAuthenticatedURL is a
+ {@link AuthenticatedURL} sub-class with built-in Hadoop Delegation Token
+ functionality.
+
+ The authentication mechanisms supported by default are Hadoop Simple
+ authentication (also known as pseudo authentication) and Kerberos SPNEGO
+ authentication.
+
+ Additional authentication mechanisms can be supported via {@link
+ DelegationTokenAuthenticator} implementations.
+
+ The default {@link DelegationTokenAuthenticator} is the {@link
+ KerberosDelegationTokenAuthenticator} class which supports
+ automatic fallback from Kerberos SPNEGO to Hadoop Simple authentication via
+ the {@link PseudoDelegationTokenAuthenticator} class.
+
+ AuthenticatedURL instances are not thread-safe.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Authenticator
+ for authentication.
+
+ @param url the URL to get the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token being used for the user where the
+ Delegation token will be stored.
+ @param renewer the renewer user.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+
+ Authenticator
+ for authentication.
+
+ @param url the URL to get the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token being used for the user where the
+ Delegation token will be stored.
+ @param renewer the renewer user.
+ @param doAsUser the user to do as, which will be the token owner.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+ Authenticator for authentication.
+
+ @param url the URL to renew the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token with the Delegation Token to renew.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+
+
+ Authenticator for authentication.
+
+ @param url the URL to renew the delegation token from. Only HTTP/S URLs are
+ supported.
+ @param token the authentication token with the Delegation Token to renew.
+ @param doAsUser the user to do as, which will be the token owner.
+ @throws IOException if an IO error occurred.
+ @throws AuthenticationException if an authentication exception occurred.]]>
+
+
+
+
+
+
+
+
+ Authenticator.
+
+ @param url the URL to cancel the delegation token from. Only HTTP/S URLs
+ are supported.
+ @param token the authentication token with the Delegation Token to cancel.
+ @throws IOException if an IO error occurred.]]>
+
+
+
+
+
+
+
+
+
+ Authenticator.
+
+ @param url the URL to cancel the delegation token from. Only HTTP/S URLs
+ are supported.
+ @param token the authentication token with the Delegation Token to cancel.
+ @param doAsUser the user to do as, which will be the token owner.
+ @throws IOException if an IO error occurred.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ KerberosDelegationTokenAuthenticator provides support for
+ Kerberos SPNEGO authentication mechanism and support for Hadoop Delegation
+ Token operations.
+
+ It falls back to the {@link PseudoDelegationTokenAuthenticator} if the HTTP
+ endpoint does not trigger a SPNEGO authentication]]>
+
+
+
+
+
+
+
+
+ PseudoDelegationTokenAuthenticator provides support for
+ Hadoop's pseudo authentication mechanism that accepts
+ the user name specified as a query string parameter and support for Hadoop
+ Delegation Token operations.
+
+ This mimics the model of Hadoop Simple authentication trusting the
+ {@link UserGroupInformation#getCurrentUser()} value.]]>
+
Any long-lived operation here will prevent the service state
+ change from completing in a timely manner.
+
If another thread is somehow invoked from the listener, and
+ that thread invokes the methods of the service (including
+ subclass-specific methods), there is a risk of a deadlock.
+
+
+
+ @param service the service that has changed.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The base implementation logs all arguments at the debug level,
+ then returns the passed in config unchanged.]]>
+
+
+
+
+
+
+ The action is to signal success by returning the exit code 0.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This method is called before {@link #init(Configuration)};
+ Any non-null configuration that is returned from this operation
+ becomes the one that is passed on to that {@link #init(Configuration)}
+ operation.
+
+ This permits implementations to change the configuration before
+ the init operation. As the ServiceLauncher only creates
+ an instance of the base {@link Configuration} class, it is
+ recommended to instantiate any subclass (such as YarnConfiguration)
+ that injects new resources.
+
+ @param config the initial configuration build up by the
+ service launcher.
+ @param args list of arguments passed to the command line
+ after any launcher-specific commands have been stripped.
+ @return the configuration to init the service with.
+ Recommended: pass down the config parameter with any changes
+ @throws Exception any problem]]>
+
+
+
+
+
+
+ The return value becomes the exit code of the launched process.
+
+ If an exception is raised, the policy is:
+
+
Any subset of {@link org.apache.hadoop.util.ExitUtil.ExitException}:
+ the exception is passed up unmodified.
+
+
Any exception which implements
+ {@link org.apache.hadoop.util.ExitCodeProvider}:
+ A new {@link ServiceLaunchException} is created with the exit code
+ and message of the thrown exception; the thrown exception becomes the
+ cause.
+
Any other exception: a new {@link ServiceLaunchException} is created
+ with the exit code {@link LauncherExitCodes#EXIT_EXCEPTION_THROWN} and
+ the message of the original exception (which becomes the cause).
+
+ @return the exit code
+ @throws org.apache.hadoop.util.ExitUtil.ExitException an exception passed
+ up as the exit code and error text.
+ @throws Exception any exception to report. If it provides an exit code
+ this is used in a wrapping exception.]]>
+
+
+
+
+ The command line options will be passed down before the
+ {@link Service#init(Configuration)} operation is invoked via an
+ invocation of {@link LaunchableService#bindArgs(Configuration, List)}
+ After the service has been successfully started via {@link Service#start()}
+ the {@link LaunchableService#execute()} method is called to execute the
+ service. When this method returns, the service launcher will exit, using
+ the return code from the method as its exit option.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 400 Bad Request}]]>
+
+
+
+
+
+ approximate HTTP equivalent: Approximate HTTP equivalent: {@code 401 Unauthorized}]]>
+
+
+
+
+
+
+
+
+
+
+ Approximate HTTP equivalent: Approximate HTTP equivalent: {@code 403: Forbidden}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 404: Not Found}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 405: Not allowed}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 406: Not Acceptable}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 408: Request Timeout}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 409: Conflict}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 500 Internal Server Error}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 501: Not Implemented}]]>
+
+
+
+
+
+ Approximate HTTP equivalent: {@code 503 Service Unavailable}]]>
+
+
+
+
+
+ If raised, this is expected to be raised server-side and likely due
+ to client/server version incompatibilities.
+
+ Approximate HTTP equivalent: {@code 505: Version Not Supported}]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Codes with a YARN prefix are YARN-related.
+
+ Many of the exit codes are designed to resemble HTTP error codes,
+ squashed into a single byte. e.g 44 , "not found" is the equivalent
+ of 404. The various 2XX HTTP error codes aren't followed;
+ the Unix standard of "0" for success is used.
+
+ 0-10: general command issues
+ 30-39: equivalent to the 3XX responses, where those responses are
+ considered errors by the application.
+ 40-49: client-side/CLI/config problems
+ 50-59: service-side problems.
+ 60+ : application specific error codes
+
]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This uses {@link String#format(String, Object...)}
+ to build the formatted exception in the ENGLISH locale.
+
+ If the last argument is a throwable, it becomes the cause of the exception.
+ It will also be used as a parameter for the format.
+ @param exitCode exit code
+ @param format format for message to use in exception
+ @param args list of arguments]]>
+
+
+
+
+
+ This uses {@link String#format(String, Object...)}
+ to build the formatted exception in the ENGLISH locale.
+ @param exitCode exit code
+ @param cause inner cause
+ @param format format for message to use in exception
+ @param args list of arguments]]>
+
+
+
+
+ When caught by the ServiceLauncher, it will convert that
+ into a process exit code.
+
+ The {@link #ServiceLaunchException(int, String, Object...)} constructor
+ generates formatted exceptions.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ This will be 0 until a call
+ to {@link #finished()} has been made.
+ @return the currently recorded duration.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Clients and/or applications can use the provided Progressable
+ to explicitly report progress to the Hadoop framework. This is especially
+ important for operations which take significant amount of time since,
+ in-lieu of the reported progress, the framework has to assume that an error
+ has occurred and time-out the operation.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Class is to be obtained
+ @return the correctly typed Class of the given object.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ kill -0 command or equivalent]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ".cmd" on Windows, or ".sh" otherwise.
+
+ @param parent File parent directory
+ @param basename String script file basename
+ @return File referencing the script in the directory]]>
+
+
+
+
+
+ ".cmd" on Windows, or ".sh" otherwise.
+
+ @param basename String script file basename
+ @return String script file name]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ IOException.
+ @return the path to {@link #WINUTILS_EXE}
+ @throws RuntimeException if the path is not resolvable]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Shell.
+ @return the thread that ran runCommand() that spawned this shell
+ or null if no thread is waiting for this shell to complete]]>
+
+
+
+
+
+
+
+
+
+
+
+ Shell interface.
+ @param cmd shell command to execute.
+ @return the output of the executed command.]]>
+
+
+
+
+
+
+
+
+ Shell interface.
+ @param env the map of environment key=value
+ @param cmd shell command to execute.
+ @param timeout time in milliseconds after which script should be marked timeout
+ @return the output of the executed command.
+ @throws IOException on any problem.]]>
+
+
+
+
+
+
+
+ Shell interface.
+ @param env the map of environment key=value
+ @param cmd shell command to execute.
+ @return the output of the executed command.
+ @throws IOException on any problem.]]>
+
+
+
+
+ Shell processes.
+ Iterates through a map of all currently running Shell
+ processes and destroys them one by one. This method is thread safe]]>
+
+
+
+
+ Shell objects.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ CreateProcess synchronization object.]]>
+
+
+
+
+ os.name property.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Important: caller must check for this value being null.
+ The lack of such checks has led to many support issues being raised.
+
+ @deprecated use one of the exception-raising getter methods,
+ specifically {@link #getWinUtilsPath()} or {@link #getWinUtilsFile()}]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Shell can be used to run shell commands like du or
+ df. It also offers facilities to gate commands by
+ time-intervals.]]>
+
+
+
+
+
+
+
+ ShutdownHookManager singleton.
+
+ @return ShutdownHookManager singleton.]]>
+
+
+
+
+
+
+ Runnable
+ @param priority priority of the shutdownHook.]]>
+
+
+
+
+
+
+
+
+ Runnable
+ @param priority priority of the shutdownHook
+ @param timeout timeout of the shutdownHook
+ @param unit unit of the timeout TimeUnit]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ShutdownHookManager enables running shutdownHook
+ in a deterministic order, higher priority first.
+
+ The JVM runs ShutdownHooks in a non-deterministic order or in parallel.
+ This class registers a single JVM shutdownHook and run all the
+ shutdownHooks registered to it (to this class) in order based on their
+ priority.
+
+ Unless a hook was registered with a shutdown explicitly set through
+ {@link #addShutdownHook(Runnable, int, long, TimeUnit)},
+ the shutdown time allocated to it is set by the configuration option
+ {@link CommonConfigurationKeysPublic#SERVICE_SHUTDOWN_TIMEOUT} in
+ {@code core-site.xml}, with a default value of
+ {@link CommonConfigurationKeysPublic#SERVICE_SHUTDOWN_TIMEOUT_DEFAULT}
+ seconds.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Tool by {@link Tool#run(String[])}, after
+ parsing with the given generic arguments. Uses the given
+ Configuration, or builds one if null.
+
+ Sets the Tool's configuration with the possibly modified
+ version of the conf.
+
+ @param conf Configuration for the Tool.
+ @param tool Tool to run.
+ @param args command-line arguments to the tool.
+ @return exit code of the {@link Tool#run(String[])} method.]]>
+
+
+
+
+
+
+
+ Tool with its Configuration.
+
+ Equivalent to run(tool.getConf(), tool, args).
+
+ @param tool Tool to run.
+ @param args command-line arguments to the tool.
+ @return exit code of the {@link Tool#run(String[])} method.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ ToolRunner can be used to run classes implementing
+ Tool interface. It works in conjunction with
+ {@link GenericOptionsParser} to parse the
+
+ generic hadoop command line arguments and modifies the
+ Configuration of the Tool. The
+ application-specific options are passed along without being modified.
+
+
+ @see Tool
+ @see GenericOptionsParser]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ this filter.
+ @param nbHash The number of hash function to consider.
+ @param hashType type of the hashing function (see
+ {@link org.apache.hadoop.util.hash.Hash}).]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Bloom filter, as defined by Bloom in 1970.
+
+ The Bloom filter is a data structure that was introduced in 1970 and that has been adopted by
+ the networking research community in the past decade thanks to the bandwidth efficiencies that it
+ offers for the transmission of set membership information between networked hosts. A sender encodes
+ the information into a bit vector, the Bloom filter, that is more compact than a conventional
+ representation. Computation and space costs for construction are linear in the number of elements.
+ The receiver uses the filter to test whether various elements are members of the set. Though the
+ filter will occasionally return a false positive, it will never return a false negative. When creating
+ the filter, the sender can choose its desired point in a trade-off between the false positive rate and the size.
+
+
+
+
+
+
+
+
+
+
+
+
+
+ this filter.
+ @param nbHash The number of hash function to consider.
+ @param hashType type of the hashing function (see
+ {@link org.apache.hadoop.util.hash.Hash}).]]>
+
+
+
+
+
+
+
+
+ this counting Bloom filter.
+
+ Invariant: nothing happens if the specified key does not belong to this counter Bloom filter.
+ @param key The key to remove.]]>
+
+
+
+
+
+
+
+
+
+
+
+ key -> count map.
+
NOTE: due to the bucket size of this filter, inserting the same
+ key more than 15 times will cause an overflow at all filter positions
+ associated with this key, and it will significantly increase the error
+ rate for this and other keys. For this reason the filter can only be
+ used to store small count values 0 <= N << 15.
+ @param key key to be tested
+ @return 0 if the key is not present. Otherwise, a positive value v will
+ be returned such that v == count with probability equal to the
+ error rate of this filter, and v > count otherwise.
+ Additionally, if the filter experienced an underflow as a result of
+ {@link #delete(Key)} operation, the return value may be lower than the
+ count with the probability of the false negative rate of such
+ filter.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ counting Bloom filter, as defined by Fan et al. in a ToN
+ 2000 paper.
+
+ A counting Bloom filter is an improvement to standard a Bloom filter as it
+ allows dynamic additions and deletions of set membership information. This
+ is achieved through the use of a counting vector instead of a bit vector.
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Builds an empty Dynamic Bloom filter.
+ @param vectorSize The number of bits in the vector.
+ @param nbHash The number of hash function to consider.
+ @param hashType type of the hashing function (see
+ {@link org.apache.hadoop.util.hash.Hash}).
+ @param nr The threshold for the maximum number of keys to record in a
+ dynamic Bloom filter row.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ dynamic Bloom filter, as defined in the INFOCOM 2006 paper.
+
+ A dynamic Bloom filter (DBF) makes use of a s * m bit matrix but
+ each of the s rows is a standard Bloom filter. The creation
+ process of a DBF is iterative. At the start, the DBF is a 1 * m
+ bit matrix, i.e., it is composed of a single standard Bloom filter.
+ It assumes that nr elements are recorded in the
+ initial bit vector, where nr {@literal <=} n
+ (n is the cardinality of the set A to record in
+ the filter).
+
+ As the size of A grows during the execution of the application,
+ several keys must be inserted in the DBF. When inserting a key into the DBF,
+ one must first get an active Bloom filter in the matrix. A Bloom filter is
+ active when the number of recorded keys, nr, is
+ strictly less than the current cardinality of A, n.
+ If an active Bloom filter is found, the key is inserted and
+ nr is incremented by one. On the other hand, if there
+ is no active Bloom filter, a new one is created (i.e., a new row is added to
+ the matrix) according to the current size of A and the element
+ is added in this new Bloom filter and the nr value of
+ this new Bloom filter is set to one. A given key is said to belong to the
+ DBF if the k positions are set to one in one of the matrix rows.
+
+
+
+
+
+
+
+
+ Builds a hash function that must obey to a given maximum number of returned values and a highest value.
+ @param maxValue The maximum highest returned value.
+ @param nbHash The number of resulting hashed values.
+ @param hashType type of the hashing function (see {@link Hash}).]]>
+
+
+
+
+ this hash function. A NOOP]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ The idea is to randomly select a bit to reset.]]>
+
+
+
+
+
+ The idea is to select the bit to reset that will generate the minimum
+ number of false negative.]]>
+
+
+
+
+
+ The idea is to select the bit to reset that will remove the maximum number
+ of false positive.]]>
+
+
+
+
+
+ The idea is to select the bit to reset that will, at the same time, remove
+ the maximum number of false positve while minimizing the amount of false
+ negative generated.]]>
+
+
+
+
+ Originally created by
+ European Commission One-Lab Project 034819.]]>
+
+
+
+
+
+
+
+
+
+
+
+
+
+ this filter.
+ @param nbHash The number of hash function to consider.
+ @param hashType type of the hashing function (see
+ {@link org.apache.hadoop.util.hash.Hash}).]]>
+
+
+
+
+
+
+
+
+ this retouched Bloom filter.
+
+ Invariant: if the false positive is null, nothing happens.
+ @param key The false positive key to add.]]>
+
+
+
+
+
+ this retouched Bloom filter.
+ @param coll The collection of false positive.]]>
+
+
+
+
+
+ this retouched Bloom filter.
+ @param keys The list of false positive.]]>
+
+
+
+
+
+ this retouched Bloom filter.
+ @param keys The array of false positive.]]>
+
+
+
+
+
+
+ this retouched Bloom filter.
+ @param scheme The selective clearing scheme to apply.]]>
+
+
+
+
+
+
+
+
+
+
+
+ retouched Bloom filter, as defined in the CoNEXT 2006 paper.
+
+ It allows the removal of selected false positives at the cost of introducing
+ random false negatives, and with the benefit of eliminating some random false
+ positives at the same time.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ Any exception generated in the future is
+ extracted and rethrown.
+
+ @param future future to evaluate
+ @param type of the result.
+ @return the result, if all went well.
+ @throws InterruptedIOException future was interrupted
+ @throws IOException if something went wrong
+ @throws RuntimeException any nested RTE thrown]]>
+
+
+
+
+
+
+
+
+
+
+
+
+ Any exception generated in the future is
+ extracted and rethrown.
+
+ @param future future to evaluate
+ @param type of the result.
+ @return the result, if all went well.
+ @throws InterruptedIOException future was interrupted
+ @throws IOException if something went wrong
+ @throws RuntimeException any nested RTE thrown
+ @throws TimeoutException the future timed out.]]>
+
+
+
+
+
+
+ type of return value.
+ @return nothing, ever.
+ @throws IOException either the inner IOException, or a wrapper around
+ any non-Runtime-Exception
+ @throws RuntimeException if that is the inner cause.]]>
+
+
+
+
+
+
+ type of return value.
+ @return nothing, ever.
+ @throws IOException either the inner IOException, or a wrapper around
+ any non-Runtime-Exception
+ @throws RuntimeException if that is the inner cause.]]>
+
+
+
+
+
+
+
If it is an IOE: Return.
+
If it is a {@link UncheckedIOException}: return the cause
+
Completion/Execution Exceptions: extract and repeat
+
If it is an RTE or Error: throw.
+
Any other type: wrap in an IOE
+
+
+ Recursively handles wrapped Execution and Completion Exceptions in
+ case something very complicated has happened.
+ @param e exception.
+ @return an IOException extracted or built from the cause.
+ @throws RuntimeException if that is the inner cause.
+ @throws Error if that is the inner cause.]]>
+
+
+
+
+ Contains methods promoted from
+ {@link org.apache.hadoop.fs.impl.FutureIOSupport} because they
+ are a key part of integrating async IO in application code.
+
+
+ One key feature is that the {@link #awaitFuture(Future)} and
+ {@link #awaitFuture(Future, long, TimeUnit)} calls will
+ extract and rethrow exceptions raised in the future's execution,
+ including extracting the inner IOException of any
+ {@code UncheckedIOException} raised in the future.
+ This makes it somewhat easier to execute IOException-raising
+ code inside futures.
+
]]>
+
+
+
+
+
+
+
+
+ type
+ @return a remote iterator]]>
+
+
+
+
+
+ type
+ @return a remote iterator]]>
+
+
+
+
+
+ type
+ @return a remote iterator]]>
+
+
+
+
+
+ type
+ @return a remote iterator]]>
+
+
+
+
+
+
+ source type
+ @param result type
+ @param iterator source
+ @param mapper transformation
+ @return a remote iterator]]>
+
+
+
+
+
+ source type
+ @param result type
+ @param iterator source
+ @return a remote iterator]]>
+
+
+
+
+
+
+
+ Elements are filtered in the hasNext() method; if not used
+ the filtering will be done on demand in the {@code next()}
+ call.
+ @param type
+ @param iterator source
+ @param filter filter
+ @return a remote iterator]]>
+
+
+
+
+
+
+ source type.
+ @return a new iterator]]>
+
+
+
+
+
+
+ type
+ @return a list of the values.
+ @throws IOException if the source RemoteIterator raises it.]]>
+
+
+
+
+
+
+
+ type
+ @return an array of the values.
+ @throws IOException if the source RemoteIterator raises it.]]>
+
+
+
+
+
+
+
+
+ If the iterator is an IOStatisticsSource returning a non-null
+ set of statistics, and this classes log is set to DEBUG,
+ then the statistics of the operation are evaluated and logged at
+ debug.
+
+ The number of entries processed is returned, as it is useful to
+ know this, especially during tests or when reporting values
+ to users.
+
+ This does not close the iterator afterwards.
+ @param source iterator source
+ @param consumer consumer of the values.
+ @return the number of elements processed
+ @param type of source
+ @throws IOException if the source RemoteIterator or the consumer raise one.]]>
+
+
+
+
+
+ type of source]]>
+
+
+
+
+ This aims to make it straightforward to use lambda-expressions to
+ transform the results of an iterator, without losing the statistics
+ in the process, and to chain the operations together.
+
+ The closeable operation will be passed through RemoteIterators which
+ wrap other RemoteIterators. This is to support any iterator which
+ can be closed to release held connections, file handles etc.
+ Unless client code is written to assume that RemoteIterator instances
+ may be closed, this is not likely to be broadly used. It is added
+ to make it possible to adopt this feature in a managed way.
+
+ One notable feature is that the
+ {@link #foreach(RemoteIterator, ConsumerRaisingIOE)} method will
+ LOG at debug any IOStatistics provided by the iterator, if such
+ statistics are provided. There's no attempt at retrieval and logging
+ if the LOG is not set to debug, so it is a zero cost feature unless
+ the logger {@code org.apache.hadoop.fs.functional.RemoteIterators}
+ is at DEBUG.
+
+ Based on the S3A Listing code, and some some work on moving other code
+ to using iterative listings so as to pick up the statistics.]]>
+
+
+
+
+
+
+
+
diff --git a/hadoop-common-project/hadoop-common/pom.xml b/hadoop-common-project/hadoop-common/pom.xml
index 6db6c2f5483a2..4391995d209f8 100644
--- a/hadoop-common-project/hadoop-common/pom.xml
+++ b/hadoop-common-project/hadoop-common/pom.xml
@@ -141,12 +141,39 @@
com.sun.jerseyjersey-servletcompile
+
+
+ javax.enterprise
+ cdi-api
+
+
+ javax.servlet
+ servlet-api
+
+
+ ch.qos.cal10n
+ cal10n-api
+
+
-
- com.sun.jersey
+ com.github.pjfanningjersey-jsoncompile
+
+
+ com.fasterxml.jackson.core
+ jackson-core
+
+
+ com.fasterxml.jackson.core
+ jackson-databind
+
+
+ com.fasterxml.jackson.jaxrs
+ jackson-jaxrs-json-provider
+
+ com.sun.jersey
@@ -187,6 +214,12 @@
org.apache.commonscommons-configuration2compile
+
+
+ javax.servlet
+ servlet-api
+
+ org.apache.commons
@@ -350,6 +383,11 @@
mockwebservertest
+
+ com.squareup.okio
+ okio-jvm
+ test
+ dnsjavadnsjava
@@ -651,40 +689,6 @@
-
- org.apache.maven.plugins
- maven-enforcer-plugin
-
-
- de.skuzzle.enforcer
- restrict-imports-enforcer-rule
- ${restrict-imports.enforcer.version}
-
-
-
-
- banned-illegal-imports
- process-sources
-
- enforce
-
-
-
-
- true
- Use hadoop-common provided implementations rather than the one provided by Guava
-
- org.apache.hadoop.thirdparty.com.google.common.annotations.VisibleForTesting
- com.google.common.annotations.VisibleForTesting
- org.apache.hadoop.thirdparty.com.google.common.base.Preconditions
- com.google.common.base.Preconditions
-
-
-
-
-
-
-
@@ -842,6 +846,36 @@
+
+
+ org.apache.maven.plugins
+ maven-antrun-plugin
+ 1.8
+
+
+ validate
+
+ run
+
+
+ true
+
+
+
+
+
+
+
+
+ Skip platform toolset version detection = ${skip.platformToolsetDetection}
+
+
+
+
+ org.codehaus.mojoexec-maven-plugin
@@ -853,6 +887,7 @@
exec
+ ${skip.platformToolsetDetection}${basedir}\..\..\dev-support\bin\win-vs-upgrade.cmd${basedir}\src\main\winutils
@@ -867,6 +902,7 @@
exec
+ ${skip.platformToolsetDetection}msbuild${basedir}/src/main/winutils/winutils.sln
@@ -879,6 +915,27 @@
+
+ compile-ms-winutils-using-build-tools
+ compile
+
+ exec
+
+
+ ${skip.platformToolsetDetection.negated}
+ msbuild
+
+ ${basedir}/src/main/winutils/winutils.sln
+ /nologo
+ /p:Configuration=Release
+ /p:OutDir=${project.build.directory}/bin/
+ /p:IntermediateOutputPath=${project.build.directory}/winutils/
+ /p:WsceConfigDir=${wsce.config.dir}
+ /p:WsceConfigFile=${wsce.config.file}
+ /p:PlatformToolset=${use.platformToolsetVersion}
+
+
+ convert-ms-native-dllgenerate-sources
@@ -886,6 +943,7 @@
exec
+ ${skip.platformToolsetDetection}${basedir}\..\..\dev-support\bin\win-vs-upgrade.cmd${basedir}\src\main\native
@@ -900,6 +958,35 @@
exec
+ ${skip.platformToolsetDetection}
+ msbuild
+
+ ${basedir}/src/main/native/native.sln
+ /nologo
+ /p:Configuration=Release
+ /p:OutDir=${project.build.directory}/bin/
+ /p:CustomZstdPrefix=${zstd.prefix}
+ /p:CustomZstdLib=${zstd.lib}
+ /p:CustomZstdInclude=${zstd.include}
+ /p:RequireZstd=${require.zstd}
+ /p:CustomOpensslPrefix=${openssl.prefix}
+ /p:CustomOpensslLib=${openssl.lib}
+ /p:CustomOpensslInclude=${openssl.include}
+ /p:RequireOpenssl=${require.openssl}
+ /p:RequireIsal=${require.isal}
+ /p:CustomIsalPrefix=${isal.prefix}
+ /p:CustomIsalLib=${isal.lib}
+
+
+
+
+ compile-ms-native-dll-using-build-tools
+ compile
+
+ exec
+
+
+ ${skip.platformToolsetDetection.negated}msbuild${basedir}/src/main/native/native.sln
@@ -917,6 +1004,7 @@
/p:RequireIsal=${require.isal}/p:CustomIsalPrefix=${isal.prefix}/p:CustomIsalLib=${isal.lib}
+ /p:PlatformToolset=${use.platformToolsetVersion}
@@ -949,7 +1037,6 @@
org.apache.maven.pluginsmaven-surefire-plugin
- ${ignoreTestFailure}${testsThreadCount}false${maven-surefire-plugin.argLine} -DminiClusterDedicatedDirs=true
@@ -1153,7 +1240,7 @@
src-test-compile-protoc-legacygenerate-test-sources
- compile
+ test-compilefalse
@@ -1162,7 +1249,7 @@
com.google.protobuf:protoc:${protobuf.version}:exe:${os.detected.classifier}
false
- ${basedir}/src/test/proto
+ ${basedir}/src/test/proto${project.build.directory}/generated-test-sources/javafalse
@@ -1173,6 +1260,16 @@
+
+ org.apache.maven.plugins
+ maven-javadoc-plugin
+
+
+ **/FSProtos.java
+
+ *.proto:*.tracing:*.protobuf
+
+
diff --git a/hadoop-common-project/hadoop-common/src/main/conf/log4j.properties b/hadoop-common-project/hadoop-common/src/main/conf/log4j.properties
index 5a2ca4d922852..54d5c729848c7 100644
--- a/hadoop-common-project/hadoop-common/src/main/conf/log4j.properties
+++ b/hadoop-common-project/hadoop-common/src/main/conf/log4j.properties
@@ -251,30 +251,45 @@ log4j.appender.NMAUDIT.MaxBackupIndex=${nm.audit.log.maxbackupindex}
#log4j.appender.HSAUDIT.DatePattern=.yyyy-MM-dd
# Http Server Request Logs
-#log4j.logger.http.requests.namenode=INFO,namenoderequestlog
-#log4j.appender.namenoderequestlog=org.apache.hadoop.http.HttpRequestLogAppender
-#log4j.appender.namenoderequestlog.Filename=${hadoop.log.dir}/jetty-namenode-yyyy_mm_dd.log
-#log4j.appender.namenoderequestlog.RetainDays=3
-
-#log4j.logger.http.requests.datanode=INFO,datanoderequestlog
-#log4j.appender.datanoderequestlog=org.apache.hadoop.http.HttpRequestLogAppender
-#log4j.appender.datanoderequestlog.Filename=${hadoop.log.dir}/jetty-datanode-yyyy_mm_dd.log
-#log4j.appender.datanoderequestlog.RetainDays=3
-
-#log4j.logger.http.requests.resourcemanager=INFO,resourcemanagerrequestlog
-#log4j.appender.resourcemanagerrequestlog=org.apache.hadoop.http.HttpRequestLogAppender
-#log4j.appender.resourcemanagerrequestlog.Filename=${hadoop.log.dir}/jetty-resourcemanager-yyyy_mm_dd.log
-#log4j.appender.resourcemanagerrequestlog.RetainDays=3
-
-#log4j.logger.http.requests.jobhistory=INFO,jobhistoryrequestlog
-#log4j.appender.jobhistoryrequestlog=org.apache.hadoop.http.HttpRequestLogAppender
-#log4j.appender.jobhistoryrequestlog.Filename=${hadoop.log.dir}/jetty-jobhistory-yyyy_mm_dd.log
-#log4j.appender.jobhistoryrequestlog.RetainDays=3
-
-#log4j.logger.http.requests.nodemanager=INFO,nodemanagerrequestlog
-#log4j.appender.nodemanagerrequestlog=org.apache.hadoop.http.HttpRequestLogAppender
-#log4j.appender.nodemanagerrequestlog.Filename=${hadoop.log.dir}/jetty-nodemanager-yyyy_mm_dd.log
-#log4j.appender.nodemanagerrequestlog.RetainDays=3
+#log4j.appender.AccessNNDRFA=org.apache.log4j.DailyRollingFileAppender
+#log4j.appender.AccessNNDRFA.File=${hadoop.log.dir}/jetty-namenode.log
+#log4j.appender.AccessNNDRFA.DatePattern=.yyyy-MM-dd
+#log4j.appender.AccessNNDRFA.layout=org.apache.log4j.PatternLayout
+#log4j.appender.AccessNNDRFA.layout.ConversionPattern=%m%n
+
+#log4j.logger.http.requests.namenode=INFO,AccessNNDRFA
+
+#log4j.appender.AccessDNDRFA=org.apache.log4j.DailyRollingFileAppender
+#log4j.appender.AccessDNDRFA.File=${hadoop.log.dir}/jetty-datanode.log
+#log4j.appender.AccessDNDRFA.DatePattern=.yyyy-MM-dd
+#log4j.appender.AccessDNDRFA.layout=org.apache.log4j.PatternLayout
+#log4j.appender.AccessDNDRFA.layout.ConversionPattern=%m%n
+
+#log4j.logger.http.requests.datanode=INFO,AccessDNDRFA
+
+#log4j.appender.AccessRMDRFA=org.apache.log4j.DailyRollingFileAppender
+#log4j.appender.AccessRMDRFA.File=${hadoop.log.dir}/jetty-resourcemanager.log
+#log4j.appender.AccessRMDRFA.DatePattern=.yyyy-MM-dd
+#log4j.appender.AccessRMDRFA.layout=org.apache.log4j.PatternLayout
+#log4j.appender.AccessRMDRFA.layout.ConversionPattern=%m%n
+
+#log4j.logger.http.requests.resourcemanager=INFO,AccessRMDRFA
+
+#log4j.appender.AccessJHDRFA=org.apache.log4j.DailyRollingFileAppender
+#log4j.appender.AccessJHDRFA.File=${hadoop.log.dir}/jetty-jobhistory.log
+#log4j.appender.AccessJHDRFA.DatePattern=.yyyy-MM-dd
+#log4j.appender.AccessJHDRFA.layout=org.apache.log4j.PatternLayout
+#log4j.appender.AccessJHDRFA.layout.ConversionPattern=%m%n
+
+#log4j.logger.http.requests.jobhistory=INFO,AccessJHDRFA
+
+#log4j.appender.AccessNMDRFA=org.apache.log4j.DailyRollingFileAppender
+#log4j.appender.AccessNMDRFA.File=${hadoop.log.dir}/jetty-jobhistory.log
+#log4j.appender.AccessNMDRFA.DatePattern=.yyyy-MM-dd
+#log4j.appender.AccessNMDRFA.layout=org.apache.log4j.PatternLayout
+#log4j.appender.AccessNMDRFA.layout.ConversionPattern=%m%n
+
+#log4j.logger.http.requests.nodemanager=INFO,AccessNMDRFA
# WebHdfs request log on datanodes
# Specify -Ddatanode.webhdfs.logger=INFO,HTTPDRFA on datanode startup to
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/ConfServlet.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/ConfServlet.java
index 67d0e6301320a..b427038fdddc9 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/ConfServlet.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/ConfServlet.java
@@ -98,7 +98,7 @@ static void writeResponse(Configuration conf,
if (FORMAT_JSON.equals(format)) {
Configuration.dumpConfiguration(conf, propertyName, out);
} else if (FORMAT_XML.equals(format)) {
- conf.writeXml(propertyName, out);
+ conf.writeXml(propertyName, out, conf);
} else {
throw new BadFormatException("Bad format: " + format);
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/ConfigRedactor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/ConfigRedactor.java
index 5b2d1449f9c86..1e74077c13977 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/ConfigRedactor.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/ConfigRedactor.java
@@ -37,6 +37,7 @@
public class ConfigRedactor {
private static final String REDACTED_TEXT = "";
+ private static final String REDACTED_XML = "******";
private List compiledPatterns;
@@ -57,8 +58,8 @@ public ConfigRedactor(Configuration conf) {
* Given a key / value pair, decides whether or not to redact and returns
* either the original value or text indicating it has been redacted.
*
- * @param key
- * @param value
+ * @param key param key.
+ * @param value param value, will return if conditions permit.
* @return Original value, or text indicating it has been redacted
*/
public String redact(String key, String value) {
@@ -84,4 +85,19 @@ private boolean configIsSensitive(String key) {
}
return false;
}
+
+ /**
+ * Given a key / value pair, decides whether or not to redact and returns
+ * either the original value or text indicating it has been redacted.
+ *
+ * @param key param key.
+ * @param value param value, will return if conditions permit.
+ * @return Original value, or text indicating it has been redacted
+ */
+ public String redactXml(String key, String value) {
+ if (configIsSensitive(key)) {
+ return REDACTED_XML;
+ }
+ return value;
+ }
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
index 7ceaad37b1dcd..ab7ff0bd40cc2 100755
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configuration.java
@@ -24,7 +24,6 @@
import com.ctc.wstx.stax.WstxInputFactory;
import com.fasterxml.jackson.core.JsonFactory;
import com.fasterxml.jackson.core.JsonGenerator;
-import org.apache.hadoop.classification.VisibleForTesting;
import java.io.BufferedInputStream;
import java.io.DataInput;
@@ -87,6 +86,7 @@
import org.apache.commons.collections.map.UnmodifiableMap;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.classification.VisibleForTesting;
import org.apache.hadoop.fs.CommonConfigurationKeysPublic;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
@@ -98,18 +98,19 @@
import org.apache.hadoop.security.alias.CredentialProvider;
import org.apache.hadoop.security.alias.CredentialProvider.CredentialEntry;
import org.apache.hadoop.security.alias.CredentialProviderFactory;
+import org.apache.hadoop.thirdparty.com.google.common.base.Strings;
+import org.apache.hadoop.util.Preconditions;
import org.apache.hadoop.util.ReflectionUtils;
import org.apache.hadoop.util.StringInterner;
import org.apache.hadoop.util.StringUtils;
+import org.apache.hadoop.util.XMLUtils;
+
import org.codehaus.stax2.XMLStreamReader2;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import org.w3c.dom.Document;
import org.w3c.dom.Element;
-import org.apache.hadoop.util.Preconditions;
-import org.apache.hadoop.thirdparty.com.google.common.base.Strings;
-
import static org.apache.commons.lang3.StringUtils.isBlank;
import static org.apache.commons.lang3.StringUtils.isNotBlank;
@@ -317,7 +318,7 @@ private static boolean getRestrictParserDefault(Object resource) {
private boolean loadDefaults = true;
/**
- * Configuration objects
+ * Configuration objects.
*/
private static final WeakHashMap REGISTRY =
new WeakHashMap();
@@ -774,7 +775,7 @@ private void updatePropertiesWithDeprecatedKeys(
private void handleDeprecation() {
LOG.debug("Handling deprecation for all properties in config...");
DeprecationContext deprecations = deprecationContext.get();
- Set keys = new HashSet();
+ Set keys = new HashSet<>();
keys.addAll(getProps().keySet());
for (Object item: keys) {
LOG.debug("Handling deprecation for " + (String)item);
@@ -1099,6 +1100,20 @@ private static int[] findSubVariable(String eval) {
return result;
}
+ /**
+ * Provides a public wrapper over substituteVars in order to avoid compatibility issues.
+ * See HADOOP-18021 for further details.
+ *
+ * @param expr the literal value of a config key
+ * @return null if expr is null, otherwise the value resulting from expanding
+ * expr using the algorithm above.
+ * @throws IllegalArgumentException when more than
+ * {@link Configuration#MAX_SUBST} replacements are required
+ */
+ public String substituteCommonVariables(String expr) {
+ return substituteVars(expr);
+ }
+
/**
* Attempts to repeatedly expand the value {@code expr} by replacing the
* left-most substring of the form "${var}" in the following precedence order
@@ -1120,13 +1135,17 @@ private static int[] findSubVariable(String eval) {
* If a cycle is detected then the original expr is returned. Loops
* involving multiple substitutions are not detected.
*
+ * In order not to introduce breaking changes (as Oozie for example contains a method with the
+ * same name and same signature) do not make this method public, use substituteCommonVariables
+ * in this case.
+ *
* @param expr the literal value of a config key
* @return null if expr is null, otherwise the value resulting from expanding
* expr using the algorithm above.
* @throws IllegalArgumentException when more than
* {@link Configuration#MAX_SUBST} replacements are required
*/
- public String substituteVars(String expr) {
+ private String substituteVars(String expr) {
if (expr == null) {
return null;
}
@@ -1890,6 +1909,7 @@ public long getTimeDuration(String name, String defaultValue,
* @param name Property name
* @param vStr The string value with time unit suffix to be converted.
* @param unit Unit to convert the stored property, if it exists.
+ * @return time duration in given time unit.
*/
public long getTimeDurationHelper(String name, String vStr, TimeUnit unit) {
return getTimeDurationHelper(name, vStr, unit, unit);
@@ -1904,6 +1924,7 @@ public long getTimeDurationHelper(String name, String vStr, TimeUnit unit) {
* @param vStr The string value with time unit suffix to be converted.
* @param defaultUnit Unit to convert the stored property, if it exists.
* @param returnUnit Unit for the returned value.
+ * @return time duration in given time unit.
*/
private long getTimeDurationHelper(String name, String vStr,
TimeUnit defaultUnit, TimeUnit returnUnit) {
@@ -2188,7 +2209,7 @@ private static int convertToInt(String value, int defaultValue) {
}
/**
- * Is the given value in the set of ranges
+ * Is the given value in the set of ranges.
* @param value the value to check
* @return is the value in the ranges?
*/
@@ -2245,7 +2266,7 @@ public Iterator iterator() {
}
/**
- * Parse the given attribute as a set of integer ranges
+ * Parse the given attribute as a set of integer ranges.
* @param name the attribute name
* @param defaultValue the default value if it is not set
* @return a new set of ranges from the configured value
@@ -2464,7 +2485,7 @@ public char[] getPasswordFromCredentialProviders(String name)
/**
* Fallback to clear text passwords in configuration.
- * @param name
+ * @param name the property name.
* @return clear text password or null
*/
protected char[] getPasswordFromConfig(String name) {
@@ -2529,6 +2550,8 @@ public InetSocketAddress getSocketAddr(
/**
* Set the socket address for the name property as
* a host:port.
+ * @param name property name.
+ * @param addr inetSocketAddress addr.
*/
public void setSocketAddr(String name, InetSocketAddress addr) {
set(name, NetUtils.getHostPortString(addr));
@@ -2706,6 +2729,7 @@ public Class> getClass(String name, Class> defaultValue) {
* @param name the conf key name.
* @param defaultValue default value.
* @param xface the interface implemented by the named class.
+ * @param Interface class type.
* @return property value as a Class,
* or defaultValue.
*/
@@ -2735,6 +2759,7 @@ else if (theClass != null)
* @param name the property name.
* @param xface the interface implemented by the classes named by
* name.
+ * @param Interface class type.
* @return a List of objects implementing xface.
*/
@SuppressWarnings("unchecked")
@@ -2767,15 +2792,16 @@ public void setClass(String name, Class> theClass, Class> xface) {
set(name, theClass.getName());
}
- /**
+ /**
* Get a local file under a directory named by dirsProp with
* the given path. If dirsProp contains multiple directories,
* then one is chosen based on path's hash code. If the selected
* directory does not exist, an attempt is made to create it.
- *
+ *
* @param dirsProp directory in which to locate the file.
* @param path file-path.
* @return local file under the directory with the given path.
+ * @throws IOException raised on errors performing I/O.
*/
public Path getLocalPath(String dirsProp, String path)
throws IOException {
@@ -2799,15 +2825,16 @@ public Path getLocalPath(String dirsProp, String path)
throw new IOException("No valid local directories in property: "+dirsProp);
}
- /**
+ /**
* Get a local file name under a directory named in dirsProp with
* the given path. If dirsProp contains multiple directories,
* then one is chosen based on path's hash code. If the selected
* directory does not exist, an attempt is made to create it.
- *
+ *
* @param dirsProp directory in which to locate the file.
* @param path file-path.
* @return local file under the directory with the given path.
+ * @throws IOException raised on errors performing I/O.
*/
public File getFile(String dirsProp, String path)
throws IOException {
@@ -2960,11 +2987,13 @@ public Iterator> iterator() {
// methods that allow non-strings to be put into configurations are removed,
// we could replace properties with a Map and get rid of this
// code.
- Map result = new HashMap();
- for(Map.Entry item: getProps().entrySet()) {
- if (item.getKey() instanceof String &&
- item.getValue() instanceof String) {
+ Properties props = getProps();
+ Map result = new HashMap<>();
+ synchronized (props) {
+ for (Map.Entry item : props.entrySet()) {
+ if (item.getKey() instanceof String && item.getValue() instanceof String) {
result.put((String) item.getKey(), (String) item.getValue());
+ }
}
}
return result.entrySet().iterator();
@@ -3417,7 +3446,7 @@ void parseNext() throws IOException, XMLStreamException {
/**
* Add tags defined in HADOOP_TAGS_SYSTEM, HADOOP_TAGS_CUSTOM.
- * @param prop
+ * @param prop properties.
*/
public void addTags(Properties prop) {
// Get all system tags
@@ -3518,7 +3547,7 @@ private void loadProperty(Properties properties, String name, String attr,
/**
* Print a warning if a property with a given name already exists with a
- * different value
+ * different value.
*/
private void checkForOverride(Properties properties, String name, String attr, String value) {
String propertyValue = properties.getProperty(attr);
@@ -3528,11 +3557,12 @@ private void checkForOverride(Properties properties, String name, String attr, S
}
}
- /**
+ /**
* Write out the non-default properties in this configuration to the given
* {@link OutputStream} using UTF-8 encoding.
- *
+ *
* @param out the output stream to write to.
+ * @throws IOException raised on errors performing I/O.
*/
public void writeXml(OutputStream out) throws IOException {
writeXml(new OutputStreamWriter(out, "UTF-8"));
@@ -3562,16 +3592,20 @@ public void writeXml(Writer out) throws IOException {
* the configuration, this method throws an {@link IllegalArgumentException}.
*
*
+ * @param propertyName xml property name.
* @param out the writer to write to.
+ * @param config configuration.
+ * @throws IOException raised on errors performing I/O.
*/
- public void writeXml(@Nullable String propertyName, Writer out)
+ public void writeXml(@Nullable String propertyName, Writer out, Configuration config)
throws IOException, IllegalArgumentException {
- Document doc = asXmlDocument(propertyName);
+ ConfigRedactor redactor = config != null ? new ConfigRedactor(this) : null;
+ Document doc = asXmlDocument(propertyName, redactor);
try {
DOMSource source = new DOMSource(doc);
StreamResult result = new StreamResult(out);
- TransformerFactory transFactory = TransformerFactory.newInstance();
+ TransformerFactory transFactory = XMLUtils.newSecureTransformerFactory();
Transformer transformer = transFactory.newTransformer();
// Important to not hold Configuration log while writing result, since
@@ -3583,11 +3617,16 @@ public void writeXml(@Nullable String propertyName, Writer out)
}
}
+ public void writeXml(@Nullable String propertyName, Writer out)
+ throws IOException, IllegalArgumentException {
+ writeXml(propertyName, out, null);
+ }
+
/**
* Return the XML DOM corresponding to this Configuration.
*/
- private synchronized Document asXmlDocument(@Nullable String propertyName)
- throws IOException, IllegalArgumentException {
+ private synchronized Document asXmlDocument(@Nullable String propertyName,
+ ConfigRedactor redactor) throws IOException, IllegalArgumentException {
Document doc;
try {
doc = DocumentBuilderFactory
@@ -3610,13 +3649,13 @@ private synchronized Document asXmlDocument(@Nullable String propertyName)
propertyName + " not found");
} else {
// given property is found, write single property
- appendXMLProperty(doc, conf, propertyName);
+ appendXMLProperty(doc, conf, propertyName, redactor);
conf.appendChild(doc.createTextNode("\n"));
}
} else {
// append all elements
for (Enumeration e = properties.keys(); e.hasMoreElements();) {
- appendXMLProperty(doc, conf, (String)e.nextElement());
+ appendXMLProperty(doc, conf, (String)e.nextElement(), redactor);
conf.appendChild(doc.createTextNode("\n"));
}
}
@@ -3632,7 +3671,7 @@ private synchronized Document asXmlDocument(@Nullable String propertyName)
* @param propertyName
*/
private synchronized void appendXMLProperty(Document doc, Element conf,
- String propertyName) {
+ String propertyName, ConfigRedactor redactor) {
// skip writing if given property name is empty or null
if (!Strings.isNullOrEmpty(propertyName)) {
String value = properties.getProperty(propertyName);
@@ -3645,8 +3684,11 @@ private synchronized void appendXMLProperty(Document doc, Element conf,
propNode.appendChild(nameNode);
Element valueNode = doc.createElement("value");
- valueNode.appendChild(doc.createTextNode(
- properties.getProperty(propertyName)));
+ String propertyValue = properties.getProperty(propertyName);
+ if (redactor != null) {
+ propertyValue = redactor.redactXml(propertyName, propertyValue);
+ }
+ valueNode.appendChild(doc.createTextNode(propertyValue));
propNode.appendChild(valueNode);
Element finalNode = doc.createElement("final");
@@ -3716,7 +3758,7 @@ private synchronized void appendXMLProperty(Document doc, Element conf,
* @param config the configuration
* @param propertyName property name
* @param out the Writer to write to
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
* @throws IllegalArgumentException when property name is not
* empty and the property is not found in configuration
**/
@@ -3763,7 +3805,7 @@ public static void dumpConfiguration(Configuration config,
*
* @param config the configuration
* @param out the Writer to write to
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
public static void dumpConfiguration(Configuration config,
Writer out) throws IOException {
@@ -3792,7 +3834,7 @@ public static void dumpConfiguration(Configuration config,
* @param jsonGen json writer
* @param config configuration
* @param name property name
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
private static void appendJSONProperty(JsonGenerator jsonGen,
Configuration config, String name, ConfigRedactor redactor)
@@ -3874,7 +3916,10 @@ synchronized boolean getQuietMode() {
return this.quietmode;
}
- /** For debugging. List non-default properties to the terminal and exit. */
+ /** For debugging. List non-default properties to the terminal and exit.
+ * @param args the argument to be parsed.
+ * @throws Exception exception.
+ */
public static void main(String[] args) throws Exception {
new Configuration().writeXml(System.out);
}
@@ -3908,8 +3953,8 @@ public void write(DataOutput out) throws IOException {
}
/**
- * get keys matching the the regex
- * @param regex
+ * get keys matching the the regex.
+ * @param regex the regex to match against.
* @return {@literal Map} with matching keys
*/
public Map getValByRegex(String regex) {
@@ -3954,6 +3999,8 @@ public static void dumpDeprecatedKeys() {
/**
* Returns whether or not a deprecated name has been warned. If the name is not
* deprecated then always return false
+ * @param name proprties.
+ * @return true if name is a warned deprecation.
*/
public static boolean hasWarnedDeprecation(String name) {
DeprecationContext deprecations = deprecationContext.get();
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configured.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configured.java
index f06af2b98df14..77a7117d19665 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configured.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Configured.java
@@ -33,7 +33,9 @@ public Configured() {
this(null);
}
- /** Construct a Configured. */
+ /** Construct a Configured.
+ * @param conf the Configuration object.
+ */
public Configured(Configuration conf) {
setConf(conf);
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Reconfigurable.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Reconfigurable.java
index c93dc31a881a9..915faf4c237ad 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Reconfigurable.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/Reconfigurable.java
@@ -33,6 +33,9 @@ public interface Reconfigurable extends Configurable {
* (or null if it was not previously set). If newVal is null, set the property
* to its default value;
*
+ * @param property property name.
+ * @param newVal new value.
+ * @throws ReconfigurationException if there was an error applying newVal.
* If the property cannot be changed, throw a
* {@link ReconfigurationException}.
*/
@@ -45,11 +48,14 @@ void reconfigureProperty(String property, String newVal)
* If isPropertyReconfigurable returns true for a property,
* then changeConf should not throw an exception when changing
* this property.
+ * @param property property name.
+ * @return true if property reconfigurable; false if not.
*/
boolean isPropertyReconfigurable(String property);
/**
* Return all the properties that can be changed at run time.
+ * @return reconfigurable propertys.
*/
Collection getReconfigurableProperties();
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/ReconfigurableBase.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/ReconfigurableBase.java
index 35dfeb99f0ba6..1c451ca6d30b9 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/ReconfigurableBase.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/ReconfigurableBase.java
@@ -79,6 +79,7 @@ public ReconfigurableBase() {
/**
* Construct a ReconfigurableBase with the {@link Configuration}
* conf.
+ * @param conf configuration.
*/
public ReconfigurableBase(Configuration conf) {
super((conf == null) ? new Configuration() : conf);
@@ -91,6 +92,7 @@ public void setReconfigurationUtil(ReconfigurationUtil ru) {
/**
* Create a new configuration.
+ * @return configuration.
*/
protected abstract Configuration getNewConf();
@@ -162,6 +164,7 @@ public void run() {
/**
* Start a reconfiguration task to reload configuration in background.
+ * @throws IOException raised on errors performing I/O.
*/
public void startReconfigurationTask() throws IOException {
synchronized (reconfigLock) {
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/ReconfigurationException.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/ReconfigurationException.java
index 0935bf025fd30..b22af76c9eb6b 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/ReconfigurationException.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/ReconfigurationException.java
@@ -59,6 +59,10 @@ public ReconfigurationException() {
/**
* Create a new instance of {@link ReconfigurationException}.
+ * @param property property name.
+ * @param newVal new value.
+ * @param oldVal old value.
+ * @param cause original exception.
*/
public ReconfigurationException(String property,
String newVal, String oldVal,
@@ -71,6 +75,9 @@ public ReconfigurationException(String property,
/**
* Create a new instance of {@link ReconfigurationException}.
+ * @param property property name.
+ * @param newVal new value.
+ * @param oldVal old value.
*/
public ReconfigurationException(String property,
String newVal, String oldVal) {
@@ -82,6 +89,7 @@ public ReconfigurationException(String property,
/**
* Get property that cannot be changed.
+ * @return property info.
*/
public String getProperty() {
return property;
@@ -89,6 +97,7 @@ public String getProperty() {
/**
* Get value to which property was supposed to be changed.
+ * @return new value.
*/
public String getNewValue() {
return newVal;
@@ -96,6 +105,7 @@ public String getNewValue() {
/**
* Get old value of property that cannot be changed.
+ * @return old value.
*/
public String getOldValue() {
return oldVal;
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/ReconfigurationTaskStatus.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/ReconfigurationTaskStatus.java
index 05ec90758e5fa..ca9ddb61566ef 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/ReconfigurationTaskStatus.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/conf/ReconfigurationTaskStatus.java
@@ -42,7 +42,8 @@ public ReconfigurationTaskStatus(long startTime, long endTime,
/**
* Return true if
* - A reconfiguration task has finished or
- * - an active reconfiguration task is running
+ * - an active reconfiguration task is running.
+ * @return true if startTime > 0; false if not.
*/
public boolean hasTask() {
return startTime > 0;
@@ -51,6 +52,7 @@ public boolean hasTask() {
/**
* Return true if the latest reconfiguration task has finished and there is
* no another active task running.
+ * @return true if endTime > 0; false if not.
*/
public boolean stopped() {
return endTime > 0;
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/CryptoCodec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/CryptoCodec.java
index 64c754faa59d8..e6813b96a2670 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/CryptoCodec.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/CryptoCodec.java
@@ -145,14 +145,18 @@ private static List> getCodecClasses(
public abstract CipherSuite getCipherSuite();
/**
- * Create a {@link org.apache.hadoop.crypto.Encryptor}.
- * @return Encryptor the encryptor
+ * Create a {@link org.apache.hadoop.crypto.Encryptor}.
+ *
+ * @return Encryptor the encryptor.
+ * @throws GeneralSecurityException thrown if create encryptor error.
*/
public abstract Encryptor createEncryptor() throws GeneralSecurityException;
-
+
/**
* Create a {@link org.apache.hadoop.crypto.Decryptor}.
+ *
* @return Decryptor the decryptor
+ * @throws GeneralSecurityException thrown if create decryptor error.
*/
public abstract Decryptor createDecryptor() throws GeneralSecurityException;
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/CryptoInputStream.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/CryptoInputStream.java
index 5ab5d341fb826..067abde9dfbb8 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/CryptoInputStream.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/CryptoInputStream.java
@@ -157,7 +157,7 @@ public InputStream getWrappedStream() {
* @param off the buffer offset.
* @param len the maximum number of decrypted data bytes to read.
* @return int the total number of decrypted data bytes read into the buffer.
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
@Override
public int read(byte[] b, int off, int len) throws IOException {
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/CryptoOutputStream.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/CryptoOutputStream.java
index 8e7522112551e..2a1335b6e745a 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/CryptoOutputStream.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/CryptoOutputStream.java
@@ -146,7 +146,7 @@ public OutputStream getWrappedStream() {
* @param b the data.
* @param off the start offset in the data.
* @param len the number of bytes to write.
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
@Override
public synchronized void write(byte[] b, int off, int len) throws IOException {
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/CryptoStreamUtils.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/CryptoStreamUtils.java
index 318975fd6cebd..dad4d20df2afd 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/CryptoStreamUtils.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/CryptoStreamUtils.java
@@ -39,7 +39,11 @@ public class CryptoStreamUtils {
private static final Logger LOG =
LoggerFactory.getLogger(CryptoStreamUtils.class);
- /** Forcibly free the direct buffer. */
+ /**
+ * Forcibly free the direct buffer.
+ *
+ * @param buffer buffer.
+ */
public static void freeDB(ByteBuffer buffer) {
if (CleanerUtil.UNMAP_SUPPORTED) {
try {
@@ -52,13 +56,22 @@ public static void freeDB(ByteBuffer buffer) {
}
}
- /** Read crypto buffer size */
+ /**
+ * Read crypto buffer size.
+ *
+ * @param conf configuration.
+ * @return hadoop.security.crypto.buffer.size.
+ */
public static int getBufferSize(Configuration conf) {
return conf.getInt(HADOOP_SECURITY_CRYPTO_BUFFER_SIZE_KEY,
HADOOP_SECURITY_CRYPTO_BUFFER_SIZE_DEFAULT);
}
-
- /** AES/CTR/NoPadding or SM4/CTR/NoPadding is required. */
+
+ /**
+ * AES/CTR/NoPadding or SM4/CTR/NoPadding is required.
+ *
+ * @param codec crypto codec.
+ */
public static void checkCodec(CryptoCodec codec) {
if (codec.getCipherSuite() != CipherSuite.AES_CTR_NOPADDING &&
codec.getCipherSuite() != CipherSuite.SM4_CTR_NOPADDING) {
@@ -67,17 +80,27 @@ public static void checkCodec(CryptoCodec codec) {
}
}
- /** Check and floor buffer size */
+ /**
+ * Check and floor buffer size.
+ *
+ * @param codec crypto codec.
+ * @param bufferSize the size of the buffer to be used.
+ * @return calc buffer size.
+ */
public static int checkBufferSize(CryptoCodec codec, int bufferSize) {
Preconditions.checkArgument(bufferSize >= MIN_BUFFER_SIZE,
"Minimum value of buffer size is " + MIN_BUFFER_SIZE + ".");
return bufferSize - bufferSize % codec.getCipherSuite()
.getAlgorithmBlockSize();
}
-
+
/**
* If input stream is {@link org.apache.hadoop.fs.Seekable}, return it's
* current position, otherwise return 0;
+ *
+ * @param in wrapper.
+ * @return current position, otherwise return 0.
+ * @throws IOException raised on errors performing I/O.
*/
public static long getInputStreamOffset(InputStream in) throws IOException {
if (in instanceof Seekable) {
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/OpensslCipher.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/OpensslCipher.java
index d22e91442cca4..b166cfc8611b3 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/OpensslCipher.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/OpensslCipher.java
@@ -84,14 +84,14 @@ static int get(String padding) throws NoSuchPaddingException {
String loadingFailure = null;
try {
if (!NativeCodeLoader.buildSupportsOpenssl()) {
- PerformanceAdvisory.LOG.debug("Build does not support openssl");
+ PerformanceAdvisory.LOG.warn("Build does not support openssl");
loadingFailure = "build does not support openssl.";
} else {
initIDs();
}
} catch (Throwable t) {
loadingFailure = t.getMessage();
- LOG.debug("Failed to load OpenSSL Cipher.", t);
+ LOG.warn("Failed to load OpenSSL Cipher.", t);
} finally {
loadingFailureReason = loadingFailure;
}
@@ -225,34 +225,33 @@ public int update(ByteBuffer input, ByteBuffer output)
output.position(output.position() + len);
return len;
}
-
+
/**
* Finishes a multiple-part operation. The data is encrypted or decrypted,
* depending on how this cipher was initialized.
*
- *
* The result is stored in the output buffer. Upon return, the output buffer's
* position will have advanced by n, where n is the value returned by this
* method; the output buffer's limit will not have changed.
- *
- *
+ *
* If output.remaining() bytes are insufficient to hold the result,
* a ShortBufferException is thrown.
*
- *
* Upon finishing, this method resets this cipher object to the state it was
* in when previously initialized. That is, the object is available to encrypt
* or decrypt more data.
- *
- *
- * If any exception is thrown, this cipher object need to be reset before it
+ *
+ * If any exception is thrown, this cipher object need to be reset before it
* can be used again.
- *
+ *
* @param output the output ByteBuffer
* @return int number of bytes stored in output
- * @throws ShortBufferException
- * @throws IllegalBlockSizeException
- * @throws BadPaddingException
+ * @throws ShortBufferException if there is insufficient space in the output buffer.
+ * @throws IllegalBlockSizeException This exception is thrown when the length
+ * of data provided to a block cipher is incorrect.
+ * @throws BadPaddingException This exception is thrown when a particular
+ * padding mechanism is expected for the input
+ * data but the data is not padded properly.
*/
public int doFinal(ByteBuffer output) throws ShortBufferException,
IllegalBlockSizeException, BadPaddingException {
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProvider.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProvider.java
index dafdaf7e15b25..5e207251805fc 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProvider.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProvider.java
@@ -242,7 +242,7 @@ protected int addVersion() {
/**
* Serialize the metadata to a set of bytes.
* @return the serialized bytes
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
protected byte[] serialize() throws IOException {
ByteArrayOutputStream buffer = new ByteArrayOutputStream();
@@ -281,7 +281,7 @@ protected byte[] serialize() throws IOException {
/**
* Deserialize a new metadata object from a set of bytes.
* @param bytes the serialized metadata
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
protected Metadata(byte[] bytes) throws IOException {
String cipher = null;
@@ -450,7 +450,7 @@ public boolean isTransient() {
* when decrypting data.
* @param versionName the name of a specific version of the key
* @return the key material
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
public abstract KeyVersion getKeyVersion(String versionName
) throws IOException;
@@ -458,14 +458,15 @@ public abstract KeyVersion getKeyVersion(String versionName
/**
* Get the key names for all keys.
* @return the list of key names
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
public abstract List getKeys() throws IOException;
/**
* Get key metadata in bulk.
* @param names the names of the keys to get
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
+ * @return Metadata Array.
*/
public Metadata[] getKeysMetadata(String... names) throws IOException {
Metadata[] result = new Metadata[names.length];
@@ -477,8 +478,10 @@ public Metadata[] getKeysMetadata(String... names) throws IOException {
/**
* Get the key material for all versions of a specific key name.
+ *
+ * @param name the base name of the key.
* @return the list of key material
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
public abstract List getKeyVersions(String name) throws IOException;
@@ -488,7 +491,7 @@ public Metadata[] getKeysMetadata(String... names) throws IOException {
* @param name the base name of the key
* @return the version name of the current version of the key or null if the
* key version doesn't exist
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
public KeyVersion getCurrentKey(String name) throws IOException {
Metadata meta = getMetadata(name);
@@ -502,7 +505,7 @@ public KeyVersion getCurrentKey(String name) throws IOException {
* Get metadata about the key.
* @param name the basename of the key
* @return the key's metadata or null if the key doesn't exist
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
public abstract Metadata getMetadata(String name) throws IOException;
@@ -512,7 +515,7 @@ public KeyVersion getCurrentKey(String name) throws IOException {
* @param material the key material for the first version of the key.
* @param options the options for the new key.
* @return the version name of the first version of the key.
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
public abstract KeyVersion createKey(String name, byte[] material,
Options options) throws IOException;
@@ -537,7 +540,7 @@ private String getAlgorithm(String cipher) {
* @param size length of the key.
* @param algorithm algorithm to use for generating the key.
* @return the generated key.
- * @throws NoSuchAlgorithmException
+ * @throws NoSuchAlgorithmException no such algorithm exception.
*/
protected byte[] generateKey(int size, String algorithm)
throws NoSuchAlgorithmException {
@@ -558,8 +561,8 @@ protected byte[] generateKey(int size, String algorithm)
* @param name the base name of the key
* @param options the options for the new key.
* @return the version name of the first version of the key.
- * @throws IOException
- * @throws NoSuchAlgorithmException
+ * @throws IOException raised on errors performing I/O.
+ * @throws NoSuchAlgorithmException no such algorithm exception.
*/
public KeyVersion createKey(String name, Options options)
throws NoSuchAlgorithmException, IOException {
@@ -570,7 +573,7 @@ public KeyVersion createKey(String name, Options options)
/**
* Delete the given key.
* @param name the name of the key to delete
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
public abstract void deleteKey(String name) throws IOException;
@@ -579,7 +582,7 @@ public KeyVersion createKey(String name, Options options)
* @param name the basename of the key
* @param material the new key material
* @return the name of the new version of the key
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
public abstract KeyVersion rollNewVersion(String name,
byte[] material
@@ -601,7 +604,10 @@ public void close() throws IOException {
*
* @param name the basename of the key
* @return the name of the new version of the key
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
+ * @throws NoSuchAlgorithmException This exception is thrown when a particular
+ * cryptographic algorithm is requested
+ * but is not available in the environment.
*/
public KeyVersion rollNewVersion(String name) throws NoSuchAlgorithmException,
IOException {
@@ -620,7 +626,7 @@ public KeyVersion rollNewVersion(String name) throws NoSuchAlgorithmException,
* version of the given key.
*
* @param name the basename of the key
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
public void invalidateCache(String name) throws IOException {
// NOP
@@ -628,18 +634,19 @@ public void invalidateCache(String name) throws IOException {
/**
* Ensures that any changes to the keys are written to persistent store.
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
public abstract void flush() throws IOException;
/**
- * Split the versionName in to a base name. Converts "/aaa/bbb/3" to
+ * Split the versionName in to a base name. Converts "/aaa/bbb@3" to
* "/aaa/bbb".
* @param versionName the version name to split
* @return the base name of the key
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
public static String getBaseName(String versionName) throws IOException {
+ Objects.requireNonNull(versionName, "VersionName cannot be null");
int div = versionName.lastIndexOf('@');
if (div == -1) {
throw new IOException("No version in key path " + versionName);
@@ -660,9 +667,11 @@ protected static String buildVersionName(String name, int version) {
/**
* Find the provider with the given key.
+ *
* @param providerList the list of providers
- * @param keyName the key name we are looking for
+ * @param keyName the key name we are looking for.
* @return the KeyProvider that has the key
+ * @throws IOException raised on errors performing I/O.
*/
public static KeyProvider findProvider(List providerList,
String keyName) throws IOException {
@@ -680,7 +689,7 @@ public static KeyProvider findProvider(List providerList,
* means. If true, the password should be provided by the caller using
* setPassword().
* @return Whether or not the provider requires a password
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
public boolean needsPassword() throws IOException {
return false;
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProviderCryptoExtension.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProviderCryptoExtension.java
index 3f3c367fc3933..f1bb314582038 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProviderCryptoExtension.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProviderCryptoExtension.java
@@ -25,10 +25,6 @@
import java.util.List;
import java.util.ListIterator;
-import javax.crypto.Cipher;
-import javax.crypto.spec.IvParameterSpec;
-import javax.crypto.spec.SecretKeySpec;
-
import org.apache.hadoop.util.Preconditions;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.crypto.CryptoCodec;
@@ -178,6 +174,7 @@ public interface CryptoExtension extends KeyProviderExtension.Extension {
* Calls to this method allows the underlying KeyProvider to warm-up any
* implementation specific caches used to store the Encrypted Keys.
* @param keyNames Array of Key Names
+ * @throws IOException thrown if the key material could not be encrypted.
*/
public void warmUpEncryptedKeys(String... keyNames)
throws IOException;
@@ -474,8 +471,9 @@ public void drain(String keyName) {
/**
* This constructor is to be used by sub classes that provide
* delegating/proxying functionality to the {@link KeyProviderCryptoExtension}
- * @param keyProvider
- * @param extension
+ *
+ * @param keyProvider key provider.
+ * @param extension crypto extension.
*/
protected KeyProviderCryptoExtension(KeyProvider keyProvider,
CryptoExtension extension) {
@@ -486,6 +484,7 @@ protected KeyProviderCryptoExtension(KeyProvider keyProvider,
* Notifies the Underlying CryptoExtension implementation to warm up any
* implementation specific caches for the specified KeyVersions
* @param keyNames Arrays of key Names
+ * @throws IOException raised on errors performing I/O.
*/
public void warmUpEncryptedKeys(String... keyNames)
throws IOException {
@@ -557,7 +556,7 @@ public EncryptedKeyVersion reencryptEncryptedKey(EncryptedKeyVersion ekv)
* Calls {@link CryptoExtension#drain(String)} for the given key name on the
* underlying {@link CryptoExtension}.
*
- * @param keyName
+ * @param keyName key name.
*/
public void drain(String keyName) {
getExtension().drain(keyName);
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProviderDelegationTokenExtension.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProviderDelegationTokenExtension.java
index 1fdc2fe12455b..3c1af424eb7cd 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProviderDelegationTokenExtension.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyProviderDelegationTokenExtension.java
@@ -48,14 +48,14 @@ public interface DelegationTokenExtension
* Renews the given token.
* @param token The token to be renewed.
* @return The token's lifetime after renewal, or 0 if it can't be renewed.
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
long renewDelegationToken(final Token> token) throws IOException;
/**
* Cancels the given token.
* @param token The token to be cancelled.
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
Void cancelDelegationToken(final Token> token) throws IOException;
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyShell.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyShell.java
index a75f7d3aa63bd..c18d0d41bc08a 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyShell.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/KeyShell.java
@@ -75,7 +75,7 @@ public class KeyShell extends CommandShell {
*
* @param args Command line arguments.
* @return 0 on success, 1 on failure.
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
@Override
protected int init(String[] args) throws IOException {
@@ -547,7 +547,7 @@ private String prettifyException(Exception e) {
* success and 1 for failure.
*
* @param args Command line arguments.
- * @throws Exception
+ * @throws Exception raised on errors performing I/O.
*/
public static void main(String[] args) throws Exception {
int res = ToolRunner.run(new Configuration(), new KeyShell(), args);
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/LoadBalancingKMSClientProvider.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/LoadBalancingKMSClientProvider.java
index 6f8f4585ee75f..f9cc3f4524ff5 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/LoadBalancingKMSClientProvider.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/LoadBalancingKMSClientProvider.java
@@ -21,6 +21,7 @@
import java.io.IOException;
import java.io.InterruptedIOException;
import java.net.ConnectException;
+import java.net.SocketException;
import java.net.URI;
import java.security.GeneralSecurityException;
import java.security.NoSuchAlgorithmException;
@@ -29,7 +30,7 @@
import java.util.List;
import java.util.concurrent.atomic.AtomicInteger;
-import javax.net.ssl.SSLHandshakeException;
+import javax.net.ssl.SSLException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.crypto.key.KeyProvider;
@@ -182,10 +183,10 @@ private T doOp(ProviderCallable op, int currPos,
} catch (IOException ioe) {
LOG.warn("KMS provider at [{}] threw an IOException: ",
provider.getKMSUrl(), ioe);
- // SSLHandshakeException can occur here because of lost connection
+ // SSLException can occur here because of lost connection
// with the KMS server, creating a ConnectException from it,
// so that the FailoverOnNetworkExceptionRetry policy will retry
- if (ioe instanceof SSLHandshakeException) {
+ if (ioe instanceof SSLException || ioe instanceof SocketException) {
Exception cause = ioe;
ioe = new ConnectException("SSLHandshakeException: "
+ cause.getMessage());
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/ValueQueue.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/ValueQueue.java
index be2db05842c8e..65eded918d60d 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/ValueQueue.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/crypto/key/kms/ValueQueue.java
@@ -63,7 +63,7 @@ public interface QueueRefiller {
* @param keyName Key name
* @param keyQueue Queue that needs to be filled
* @param numValues number of Values to be added to the queue.
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
public void fillQueueForKey(String keyName,
Queue keyQueue, int numValues) throws IOException;
@@ -268,7 +268,7 @@ public ValueQueue(final int numValues, final float lowWaterMark, long expiry,
* Initializes the Value Queues for the provided keys by calling the
* fill Method with "numInitValues" values
* @param keyNames Array of key Names
- * @throws ExecutionException
+ * @throws ExecutionException executionException.
*/
public void initializeQueuesForKeys(String... keyNames)
throws ExecutionException {
@@ -285,8 +285,8 @@ public void initializeQueuesForKeys(String... keyNames)
* function to add 1 value to Queue and then drain it.
* @param keyName String key name
* @return E the next value in the Queue
- * @throws IOException
- * @throws ExecutionException
+ * @throws IOException raised on errors performing I/O.
+ * @throws ExecutionException executionException.
*/
public E getNext(String keyName)
throws IOException, ExecutionException {
@@ -344,8 +344,8 @@ public int getSize(String keyName) {
* @param keyName String key name
* @param num Minimum number of values to return.
* @return {@literal List} values returned
- * @throws IOException
- * @throws ExecutionException
+ * @throws IOException raised on errors performing I/O.
+ * @throws ExecutionException execution exception.
*/
public List getAtMost(String keyName, int num) throws IOException,
ExecutionException {
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AbstractFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AbstractFileSystem.java
index d9818b472f0e5..a4737c548c8fa 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AbstractFileSystem.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AbstractFileSystem.java
@@ -272,7 +272,7 @@ public static AbstractFileSystem get(final URI uri, final Configuration conf)
* @param supportedScheme the scheme supported by the implementor
* @param authorityNeeded if true then theURI must have authority, if false
* then the URI must have null authority.
- *
+ * @param defaultPort default port to use if port is not specified in the URI.
* @throws URISyntaxException uri has syntax error
*/
public AbstractFileSystem(final URI uri, final String supportedScheme,
@@ -281,11 +281,12 @@ public AbstractFileSystem(final URI uri, final String supportedScheme,
myUri = getUri(uri, supportedScheme, authorityNeeded, defaultPort);
statistics = getStatistics(uri);
}
-
+
/**
- * Check that the Uri's scheme matches
- * @param uri
- * @param supportedScheme
+ * Check that the Uri's scheme matches.
+ *
+ * @param uri name URI of the FS.
+ * @param supportedScheme supported scheme.
*/
public void checkScheme(URI uri, String supportedScheme) {
String scheme = uri.getScheme();
@@ -362,7 +363,7 @@ public URI getUri() {
* If the path is fully qualified URI, then its scheme and authority
* matches that of this file system. Otherwise the path must be
* slash-relative name.
- *
+ * @param path the path.
* @throws InvalidPathException if the path is invalid
*/
public void checkPath(Path path) {
@@ -431,7 +432,7 @@ public String getUriPath(final Path p) {
/**
* Make the path fully qualified to this file system
- * @param path
+ * @param path the path.
* @return the qualified path
*/
public Path makeQualified(Path path) {
@@ -496,9 +497,9 @@ public FsServerDefaults getServerDefaults(final Path f) throws IOException {
* through any internal symlinks or mount point
* @param p path to be resolved
* @return fully qualified path
- * @throws FileNotFoundException
- * @throws AccessControlException
- * @throws IOException
+ * @throws FileNotFoundException when file not find throw.
+ * @throws AccessControlException when accees control error throw.
+ * @throws IOException raised on errors performing I/O.
* @throws UnresolvedLinkException if symbolic link on path cannot be
* resolved internally
*/
@@ -513,6 +514,18 @@ public Path resolvePath(final Path p) throws FileNotFoundException,
* {@link FileContext#create(Path, EnumSet, Options.CreateOpts...)} except
* that the Path f must be fully qualified and the permission is absolute
* (i.e. umask has been applied).
+ *
+ * @param f the path.
+ * @param createFlag create_flag.
+ * @param opts create ops.
+ * @throws AccessControlException access controll exception.
+ * @throws FileAlreadyExistsException file already exception.
+ * @throws FileNotFoundException file not found exception.
+ * @throws ParentNotDirectoryException parent not dir exception.
+ * @throws UnsupportedFileSystemException unsupported file system exception.
+ * @throws UnresolvedLinkException unresolved link exception.
+ * @throws IOException raised on errors performing I/O.
+ * @return output stream.
*/
public final FSDataOutputStream create(final Path f,
final EnumSet createFlag, Options.CreateOpts... opts)
@@ -630,6 +643,24 @@ public final FSDataOutputStream create(final Path f,
* The specification of this method matches that of
* {@link #create(Path, EnumSet, Options.CreateOpts...)} except that the opts
* have been declared explicitly.
+ *
+ * @param f the path.
+ * @param flag create flag.
+ * @param absolutePermission absolute permission.
+ * @param bufferSize buffer size.
+ * @param replication replications.
+ * @param blockSize block size.
+ * @param progress progress.
+ * @param checksumOpt check sum opt.
+ * @param createParent create parent.
+ * @throws AccessControlException access control exception.
+ * @throws FileAlreadyExistsException file already exists exception.
+ * @throws FileNotFoundException file not found exception.
+ * @throws ParentNotDirectoryException parent not directory exception.
+ * @throws UnsupportedFileSystemException unsupported filesystem exception.
+ * @throws UnresolvedLinkException unresolved link exception.
+ * @throws IOException raised on errors performing I/O.
+ * @return output stream.
*/
public abstract FSDataOutputStream createInternal(Path f,
EnumSet flag, FsPermission absolutePermission,
@@ -644,6 +675,14 @@ public abstract FSDataOutputStream createInternal(Path f,
* {@link FileContext#mkdir(Path, FsPermission, boolean)} except that the Path
* f must be fully qualified and the permission is absolute (i.e.
* umask has been applied).
+ * @param dir directory.
+ * @param permission permission.
+ * @param createParent create parent flag.
+ * @throws AccessControlException access control exception.
+ * @throws FileAlreadyExistsException file already exists exception.
+ * @throws FileNotFoundException file not found exception.
+ * @throws UnresolvedLinkException unresolved link exception.
+ * @throws IOException raised on errors performing I/O.
*/
public abstract void mkdir(final Path dir, final FsPermission permission,
final boolean createParent) throws AccessControlException,
@@ -654,6 +693,14 @@ public abstract void mkdir(final Path dir, final FsPermission permission,
* The specification of this method matches that of
* {@link FileContext#delete(Path, boolean)} except that Path f must be for
* this file system.
+ *
+ * @param f the path.
+ * @param recursive recursive flag.
+ * @throws AccessControlException access control exception.
+ * @throws FileNotFoundException file not found exception.
+ * @throws UnresolvedLinkException unresolved link exception.
+ * @throws IOException raised on errors performing I/O.
+ * @return if successfully deleted success true, not false.
*/
public abstract boolean delete(final Path f, final boolean recursive)
throws AccessControlException, FileNotFoundException,
@@ -663,6 +710,13 @@ public abstract boolean delete(final Path f, final boolean recursive)
* The specification of this method matches that of
* {@link FileContext#open(Path)} except that Path f must be for this
* file system.
+ *
+ * @param f the path.
+ * @throws AccessControlException access control exception.
+ * @throws FileNotFoundException file not found exception.
+ * @throws UnresolvedLinkException unresolved link exception.
+ * @throws IOException raised on errors performing I/O.
+ * @return input stream.
*/
public FSDataInputStream open(final Path f) throws AccessControlException,
FileNotFoundException, UnresolvedLinkException, IOException {
@@ -673,6 +727,14 @@ public FSDataInputStream open(final Path f) throws AccessControlException,
* The specification of this method matches that of
* {@link FileContext#open(Path, int)} except that Path f must be for this
* file system.
+ *
+ * @param f the path.
+ * @param bufferSize buffer size.
+ * @throws AccessControlException access control exception.
+ * @throws FileNotFoundException file not found exception.
+ * @throws UnresolvedLinkException unresolved link exception.
+ * @throws IOException raised on errors performing I/O.
+ * @return if successfully open success true, not false.
*/
public abstract FSDataInputStream open(final Path f, int bufferSize)
throws AccessControlException, FileNotFoundException,
@@ -682,6 +744,14 @@ public abstract FSDataInputStream open(final Path f, int bufferSize)
* The specification of this method matches that of
* {@link FileContext#truncate(Path, long)} except that Path f must be for
* this file system.
+ *
+ * @param f the path.
+ * @param newLength new length.
+ * @throws AccessControlException access control exception.
+ * @throws FileNotFoundException file not found exception.
+ * @throws UnresolvedLinkException unresolved link exception.
+ * @throws IOException raised on errors performing I/O.
+ * @return if successfully truncate success true, not false.
*/
public boolean truncate(Path f, long newLength)
throws AccessControlException, FileNotFoundException,
@@ -694,6 +764,14 @@ public boolean truncate(Path f, long newLength)
* The specification of this method matches that of
* {@link FileContext#setReplication(Path, short)} except that Path f must be
* for this file system.
+ *
+ * @param f the path.
+ * @param replication replication.
+ * @return if successfully set replication success true, not false.
+ * @throws AccessControlException access control exception.
+ * @throws FileNotFoundException file not found exception.
+ * @throws UnresolvedLinkException unresolved link exception.
+ * @throws IOException raised on errors performing I/O.
*/
public abstract boolean setReplication(final Path f,
final short replication) throws AccessControlException,
@@ -703,6 +781,16 @@ public abstract boolean setReplication(final Path f,
* The specification of this method matches that of
* {@link FileContext#rename(Path, Path, Options.Rename...)} except that Path
* f must be for this file system.
+ *
+ * @param src src.
+ * @param dst dst.
+ * @param options options.
+ * @throws AccessControlException access control exception.
+ * @throws FileAlreadyExistsException file already exists exception.
+ * @throws FileNotFoundException file not found exception.
+ * @throws ParentNotDirectoryException parent not directory exception.
+ * @throws UnresolvedLinkException unresolved link exception.
+ * @throws IOException raised on errors performing I/O.
*/
public final void rename(final Path src, final Path dst,
final Options.Rename... options) throws AccessControlException,
@@ -727,6 +815,15 @@ public final void rename(final Path src, final Path dst,
* File systems that do not have a built in overwrite need implement only this
* method and can take advantage of the default impl of the other
* {@link #renameInternal(Path, Path, boolean)}
+ *
+ * @param src src.
+ * @param dst dst.
+ * @throws AccessControlException access control exception.
+ * @throws FileAlreadyExistsException file already exists exception.
+ * @throws FileNotFoundException file not found exception.
+ * @throws ParentNotDirectoryException parent not directory exception.
+ * @throws UnresolvedLinkException unresolved link exception.
+ * @throws IOException raised on errors performing I/O.
*/
public abstract void renameInternal(final Path src, final Path dst)
throws AccessControlException, FileAlreadyExistsException,
@@ -737,6 +834,16 @@ public abstract void renameInternal(final Path src, final Path dst)
* The specification of this method matches that of
* {@link FileContext#rename(Path, Path, Options.Rename...)} except that Path
* f must be for this file system.
+ *
+ * @param src src.
+ * @param dst dst.
+ * @param overwrite overwrite flag.
+ * @throws AccessControlException access control exception.
+ * @throws FileAlreadyExistsException file already exists exception.
+ * @throws FileNotFoundException file not found exception.
+ * @throws ParentNotDirectoryException parent not directory exception.
+ * @throws UnresolvedLinkException unresolved link exception.
+ * @throws IOException raised on errors performing I/O.
*/
public void renameInternal(final Path src, final Path dst,
boolean overwrite) throws AccessControlException,
@@ -800,6 +907,12 @@ public boolean supportsSymlinks() {
/**
* The specification of this method matches that of
* {@link FileContext#createSymlink(Path, Path, boolean)};
+ *
+ * @param target target.
+ * @param link link.
+ * @param createParent create parent.
+ * @throws IOException raised on errors performing I/O.
+ * @throws UnresolvedLinkException unresolved link exception.
*/
public void createSymlink(final Path target, final Path link,
final boolean createParent) throws IOException, UnresolvedLinkException {
@@ -810,6 +923,8 @@ public void createSymlink(final Path target, final Path link,
* Partially resolves the path. This is used during symlink resolution in
* {@link FSLinkResolver}, and differs from the similarly named method
* {@link FileContext#getLinkTarget(Path)}.
+ * @param f the path.
+ * @return target path.
* @throws IOException subclass implementations may throw IOException
*/
public Path getLinkTarget(final Path f) throws IOException {
@@ -822,6 +937,13 @@ public Path getLinkTarget(final Path f) throws IOException {
* The specification of this method matches that of
* {@link FileContext#setPermission(Path, FsPermission)} except that Path f
* must be for this file system.
+ *
+ * @param f the path.
+ * @param permission permission.
+ * @throws AccessControlException access control exception.
+ * @throws FileNotFoundException file not found exception.
+ * @throws UnresolvedLinkException unresolved link exception.
+ * @throws IOException raised on errors performing I/O.
*/
public abstract void setPermission(final Path f,
final FsPermission permission) throws AccessControlException,
@@ -831,6 +953,14 @@ public abstract void setPermission(final Path f,
* The specification of this method matches that of
* {@link FileContext#setOwner(Path, String, String)} except that Path f must
* be for this file system.
+ *
+ * @param f the path.
+ * @param username username.
+ * @param groupname groupname.
+ * @throws AccessControlException access control exception.
+ * @throws FileNotFoundException file not found exception.
+ * @throws UnresolvedLinkException unresolved link exception.
+ * @throws IOException raised on errors performing I/O.
*/
public abstract void setOwner(final Path f, final String username,
final String groupname) throws AccessControlException,
@@ -840,6 +970,14 @@ public abstract void setOwner(final Path f, final String username,
* The specification of this method matches that of
* {@link FileContext#setTimes(Path, long, long)} except that Path f must be
* for this file system.
+ *
+ * @param f the path.
+ * @param mtime modify time.
+ * @param atime access time.
+ * @throws AccessControlException access control exception.
+ * @throws FileNotFoundException file not found exception.
+ * @throws UnresolvedLinkException unresolved link exception.
+ * @throws IOException raised on errors performing I/O.
*/
public abstract void setTimes(final Path f, final long mtime,
final long atime) throws AccessControlException, FileNotFoundException,
@@ -849,6 +987,13 @@ public abstract void setTimes(final Path f, final long mtime,
* The specification of this method matches that of
* {@link FileContext#getFileChecksum(Path)} except that Path f must be for
* this file system.
+ *
+ * @param f the path.
+ * @throws AccessControlException access control exception.
+ * @throws FileNotFoundException file not found exception.
+ * @throws UnresolvedLinkException unresolved link exception.
+ * @throws IOException raised on errors performing I/O.
+ * @return File Check sum.
*/
public abstract FileChecksum getFileChecksum(final Path f)
throws AccessControlException, FileNotFoundException,
@@ -859,6 +1004,13 @@ public abstract FileChecksum getFileChecksum(final Path f)
* {@link FileContext#getFileStatus(Path)}
* except that an UnresolvedLinkException may be thrown if a symlink is
* encountered in the path.
+ *
+ * @param f the path.
+ * @throws AccessControlException access control exception.
+ * @throws FileNotFoundException file not found exception.
+ * @throws UnresolvedLinkException unresolved link exception.
+ * @throws IOException raised on errors performing I/O.
+ * @return File Status
*/
public abstract FileStatus getFileStatus(final Path f)
throws AccessControlException, FileNotFoundException,
@@ -870,8 +1022,8 @@ public abstract FileStatus getFileStatus(final Path f)
* In some FileSystem implementations such as HDFS metadata
* synchronization is essential to guarantee consistency of read requests
* particularly in HA setting.
- * @throws IOException
- * @throws UnsupportedOperationException
+ * @throws IOException raised on errors performing I/O.
+ * @throws UnsupportedOperationException Unsupported Operation Exception.
*/
public void msync() throws IOException, UnsupportedOperationException {
throw new UnsupportedOperationException(getClass().getCanonicalName() +
@@ -883,6 +1035,13 @@ public void msync() throws IOException, UnsupportedOperationException {
* {@link FileContext#access(Path, FsAction)}
* except that an UnresolvedLinkException may be thrown if a symlink is
* encountered in the path.
+ *
+ * @param path the path.
+ * @param mode fsaction mode.
+ * @throws AccessControlException access control exception.
+ * @throws FileNotFoundException file not found exception.
+ * @throws UnresolvedLinkException unresolved link exception.
+ * @throws IOException raised on errors performing I/O.
*/
@InterfaceAudience.LimitedPrivate({"HDFS", "Hive"})
public void access(Path path, FsAction mode) throws AccessControlException,
@@ -897,6 +1056,13 @@ public void access(Path path, FsAction mode) throws AccessControlException,
* encountered in the path leading up to the final path component.
* If the file system does not support symlinks then the behavior is
* equivalent to {@link AbstractFileSystem#getFileStatus(Path)}.
+ *
+ * @param f the path.
+ * @throws AccessControlException access control exception.
+ * @throws FileNotFoundException file not found exception.
+ * @throws UnsupportedFileSystemException UnSupported File System Exception.
+ * @throws IOException raised on errors performing I/O.
+ * @return file status.
*/
public FileStatus getFileLinkStatus(final Path f)
throws AccessControlException, FileNotFoundException,
@@ -908,6 +1074,15 @@ public FileStatus getFileLinkStatus(final Path f)
* The specification of this method matches that of
* {@link FileContext#getFileBlockLocations(Path, long, long)} except that
* Path f must be for this file system.
+ *
+ * @param f the path.
+ * @param start start.
+ * @param len length.
+ * @throws AccessControlException access control exception.
+ * @throws FileNotFoundException file not found exception.
+ * @throws UnresolvedLinkException unresolved link exception.
+ * @throws IOException raised on errors performing I/O.
+ * @return BlockLocation Array.
*/
public abstract BlockLocation[] getFileBlockLocations(final Path f,
final long start, final long len) throws AccessControlException,
@@ -917,6 +1092,13 @@ public abstract BlockLocation[] getFileBlockLocations(final Path f,
* The specification of this method matches that of
* {@link FileContext#getFsStatus(Path)} except that Path f must be for this
* file system.
+ *
+ * @param f the path.
+ * @throws AccessControlException access control exception.
+ * @throws FileNotFoundException file not found exception.
+ * @throws UnresolvedLinkException unresolved link exception.
+ * @throws IOException raised on errors performing I/O.
+ * @return Fs Status.
*/
public FsStatus getFsStatus(final Path f) throws AccessControlException,
FileNotFoundException, UnresolvedLinkException, IOException {
@@ -927,6 +1109,11 @@ public FsStatus getFsStatus(final Path f) throws AccessControlException,
/**
* The specification of this method matches that of
* {@link FileContext#getFsStatus(Path)}.
+ *
+ * @throws AccessControlException access control exception.
+ * @throws FileNotFoundException file not found exception.
+ * @throws IOException raised on errors performing I/O.
+ * @return Fs Status.
*/
public abstract FsStatus getFsStatus() throws AccessControlException,
FileNotFoundException, IOException;
@@ -935,6 +1122,13 @@ public abstract FsStatus getFsStatus() throws AccessControlException,
* The specification of this method matches that of
* {@link FileContext#listStatus(Path)} except that Path f must be for this
* file system.
+ *
+ * @param f path.
+ * @throws AccessControlException access control exception.
+ * @throws FileNotFoundException file not found exception.
+ * @throws UnresolvedLinkException unresolved link exception.
+ * @throws IOException raised on errors performing I/O.
+ * @return FileStatus Iterator.
*/
public RemoteIterator listStatusIterator(final Path f)
throws AccessControlException, FileNotFoundException,
@@ -967,6 +1161,13 @@ public FileStatus next() {
* will have different formats for replicated and erasure coded file. Please
* refer to {@link FileSystem#getFileBlockLocations(FileStatus, long, long)}
* for more details.
+ *
+ * @param f the path.
+ * @throws AccessControlException access control exception.
+ * @throws FileNotFoundException file not found exception.
+ * @throws UnresolvedLinkException unresolved link exception.
+ * @throws IOException raised on errors performing I/O.
+ * @return FileStatus Iterator.
*/
public RemoteIterator listLocatedStatus(final Path f)
throws AccessControlException, FileNotFoundException,
@@ -999,6 +1200,12 @@ public LocatedFileStatus next() throws IOException {
* The specification of this method matches that of
* {@link FileContext.Util#listStatus(Path)} except that Path f must be
* for this file system.
+ * @param f the path.
+ * @throws AccessControlException access control exception.
+ * @throws FileNotFoundException file not found exception.
+ * @throws UnresolvedLinkException unresolved link exception.
+ * @throws IOException raised on errors performing I/O.
+ * @return FileStatus Iterator.
*/
public abstract FileStatus[] listStatus(final Path f)
throws AccessControlException, FileNotFoundException,
@@ -1007,7 +1214,8 @@ public abstract FileStatus[] listStatus(final Path f)
/**
* @return an iterator over the corrupt files under the given path
* (may contain duplicates if a file has more than one corrupt block)
- * @throws IOException
+ * @param path the path.
+ * @throws IOException raised on errors performing I/O.
*/
public RemoteIterator listCorruptFileBlocks(Path path)
throws IOException {
@@ -1020,6 +1228,10 @@ public RemoteIterator listCorruptFileBlocks(Path path)
* The specification of this method matches that of
* {@link FileContext#setVerifyChecksum(boolean, Path)} except that Path f
* must be for this file system.
+ *
+ * @param verifyChecksum verify check sum flag.
+ * @throws AccessControlException access control exception.
+ * @throws IOException raised on errors performing I/O.
*/
public abstract void setVerifyChecksum(final boolean verifyChecksum)
throws AccessControlException, IOException;
@@ -1041,7 +1253,7 @@ public String getCanonicalServiceName() {
* @param renewer the account name that is allowed to renew the token.
* @return List of delegation tokens.
* If delegation tokens not supported then return a list of size zero.
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
@InterfaceAudience.LimitedPrivate( { "HDFS", "MapReduce" })
public List> getDelegationTokens(String renewer) throws IOException {
@@ -1141,7 +1353,7 @@ public AclStatus getAclStatus(Path path) throws IOException {
* @param path Path to modify
* @param name xattr name.
* @param value xattr value.
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
public void setXAttr(Path path, String name, byte[] value)
throws IOException {
@@ -1160,7 +1372,7 @@ public void setXAttr(Path path, String name, byte[] value)
* @param name xattr name.
* @param value xattr value.
* @param flag xattr set flag
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
public void setXAttr(Path path, String name, byte[] value,
EnumSet flag) throws IOException {
@@ -1178,7 +1390,7 @@ public void setXAttr(Path path, String name, byte[] value,
* @param path Path to get extended attribute
* @param name xattr name.
* @return byte[] xattr value.
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
public byte[] getXAttr(Path path, String name) throws IOException {
throw new UnsupportedOperationException(getClass().getSimpleName()
@@ -1196,7 +1408,7 @@ public byte[] getXAttr(Path path, String name) throws IOException {
*
* @return {@literal Map} describing the XAttrs of the file
* or directory
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
public Map getXAttrs(Path path) throws IOException {
throw new UnsupportedOperationException(getClass().getSimpleName()
@@ -1214,7 +1426,7 @@ public Map getXAttrs(Path path) throws IOException {
* @param names XAttr names.
* @return {@literal Map} describing the XAttrs of the file
* or directory
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
public Map getXAttrs(Path path, List names)
throws IOException {
@@ -1232,7 +1444,7 @@ public Map getXAttrs(Path path, List names)
* @param path Path to get extended attributes
* @return {@literal Map} describing the XAttrs of the file
* or directory
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
public List listXAttrs(Path path)
throws IOException {
@@ -1249,7 +1461,7 @@ public List listXAttrs(Path path)
*
* @param path Path to remove extended attribute
* @param name xattr name
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
public void removeXAttr(Path path, String name) throws IOException {
throw new UnsupportedOperationException(getClass().getSimpleName()
@@ -1259,6 +1471,11 @@ public void removeXAttr(Path path, String name) throws IOException {
/**
* The specification of this method matches that of
* {@link FileContext#createSnapshot(Path, String)}.
+ *
+ * @param path the path.
+ * @param snapshotName snapshot name.
+ * @throws IOException raised on errors performing I/O.
+ * @return path.
*/
public Path createSnapshot(final Path path, final String snapshotName)
throws IOException {
@@ -1269,6 +1486,11 @@ public Path createSnapshot(final Path path, final String snapshotName)
/**
* The specification of this method matches that of
* {@link FileContext#renameSnapshot(Path, String, String)}.
+ *
+ * @param path the path.
+ * @param snapshotOldName snapshot old name.
+ * @param snapshotNewName snapshot new name.
+ * @throws IOException raised on errors performing I/O.
*/
public void renameSnapshot(final Path path, final String snapshotOldName,
final String snapshotNewName) throws IOException {
@@ -1279,6 +1501,10 @@ public void renameSnapshot(final Path path, final String snapshotOldName,
/**
* The specification of this method matches that of
* {@link FileContext#deleteSnapshot(Path, String)}.
+ *
+ * @param snapshotDir snapshot dir.
+ * @param snapshotName snapshot name.
+ * @throws IOException raised on errors performing I/O.
*/
public void deleteSnapshot(final Path snapshotDir, final String snapshotName)
throws IOException {
@@ -1289,7 +1515,7 @@ public void deleteSnapshot(final Path snapshotDir, final String snapshotName)
/**
* Set the source path to satisfy storage policy.
* @param path The source path referring to either a directory or a file.
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
public void satisfyStoragePolicy(final Path path) throws IOException {
throw new UnsupportedOperationException(
@@ -1303,6 +1529,7 @@ public void satisfyStoragePolicy(final Path path) throws IOException {
* @param policyName the name of the target storage policy. The list
* of supported Storage policies can be retrieved
* via {@link #getAllStoragePolicies}.
+ * @throws IOException raised on errors performing I/O.
*/
public void setStoragePolicy(final Path path, final String policyName)
throws IOException {
@@ -1314,7 +1541,7 @@ public void setStoragePolicy(final Path path, final String policyName)
/**
* Unset the storage policy set for a given file or directory.
* @param src file or directory path.
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
public void unsetStoragePolicy(final Path src) throws IOException {
throw new UnsupportedOperationException(getClass().getSimpleName()
@@ -1326,7 +1553,7 @@ public void unsetStoragePolicy(final Path src) throws IOException {
*
* @param src file or directory path.
* @return storage policy for give file.
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
public BlockStoragePolicySpi getStoragePolicy(final Path src)
throws IOException {
@@ -1338,7 +1565,7 @@ public BlockStoragePolicySpi getStoragePolicy(final Path src)
* Retrieve all the storage policies supported by this file system.
*
* @return all storage policies supported by this filesystem.
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
public Collection extends BlockStoragePolicySpi> getAllStoragePolicies()
throws IOException {
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AvroFSInput.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AvroFSInput.java
index b4a4a85674dfa..7518dd2f7ef74 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AvroFSInput.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/AvroFSInput.java
@@ -25,6 +25,10 @@
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
+import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY;
+import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_SEQUENTIAL;
+import static org.apache.hadoop.util.functional.FutureIO.awaitFuture;
+
/** Adapts an {@link FSDataInputStream} to Avro's SeekableInput interface. */
@InterfaceAudience.Public
@InterfaceStability.Stable
@@ -32,17 +36,31 @@ public class AvroFSInput implements Closeable, SeekableInput {
private final FSDataInputStream stream;
private final long len;
- /** Construct given an {@link FSDataInputStream} and its length. */
+ /**
+ * Construct given an {@link FSDataInputStream} and its length.
+ *
+ * @param in inputstream.
+ * @param len len.
+ */
public AvroFSInput(final FSDataInputStream in, final long len) {
this.stream = in;
this.len = len;
}
- /** Construct given a {@link FileContext} and a {@link Path}. */
+ /** Construct given a {@link FileContext} and a {@link Path}.
+ * @param fc filecontext.
+ * @param p the path.
+ * @throws IOException If an I/O error occurred.
+ * */
public AvroFSInput(final FileContext fc, final Path p) throws IOException {
FileStatus status = fc.getFileStatus(p);
this.len = status.getLen();
- this.stream = fc.open(p);
+ this.stream = awaitFuture(fc.openFile(p)
+ .opt(FS_OPTION_OPENFILE_READ_POLICY,
+ FS_OPTION_OPENFILE_READ_POLICY_SEQUENTIAL)
+ .withFileStatus(status)
+ .build());
+ fc.open(p);
}
@Override
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BatchedRemoteIterator.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BatchedRemoteIterator.java
index 607fffbcc701a..e693bcbfe89fc 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BatchedRemoteIterator.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BatchedRemoteIterator.java
@@ -68,6 +68,7 @@ public BatchedRemoteIterator(K prevKey) {
*
* @param prevKey The key to send.
* @return A list of replies.
+ * @throws IOException If an I/O error occurred.
*/
public abstract BatchedEntries makeRequest(K prevKey) throws IOException;
@@ -102,6 +103,8 @@ public boolean hasNext() throws IOException {
/**
* Return the next list key associated with an element.
+ * @param element element.
+ * @return K Generics Type.
*/
public abstract K elementToPrevKey(E element);
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BlockLocation.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BlockLocation.java
index 29358dd7d1086..67687c1f0e04c 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BlockLocation.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BlockLocation.java
@@ -85,6 +85,7 @@ public BlockLocation() {
/**
* Copy constructor.
+ * @param that blocklocation.
*/
public BlockLocation(BlockLocation that) {
this.hosts = that.hosts;
@@ -100,6 +101,10 @@ public BlockLocation(BlockLocation that) {
/**
* Constructor with host, name, offset and length.
+ * @param names names array.
+ * @param hosts host array.
+ * @param offset offset.
+ * @param length length.
*/
public BlockLocation(String[] names, String[] hosts, long offset,
long length) {
@@ -108,6 +113,11 @@ public BlockLocation(String[] names, String[] hosts, long offset,
/**
* Constructor with host, name, offset, length and corrupt flag.
+ * @param names names.
+ * @param hosts hosts.
+ * @param offset offset.
+ * @param length length.
+ * @param corrupt corrupt.
*/
public BlockLocation(String[] names, String[] hosts, long offset,
long length, boolean corrupt) {
@@ -116,6 +126,11 @@ public BlockLocation(String[] names, String[] hosts, long offset,
/**
* Constructor with host, name, network topology, offset and length.
+ * @param names names.
+ * @param hosts hosts.
+ * @param topologyPaths topologyPaths.
+ * @param offset offset.
+ * @param length length.
*/
public BlockLocation(String[] names, String[] hosts, String[] topologyPaths,
long offset, long length) {
@@ -125,6 +140,12 @@ public BlockLocation(String[] names, String[] hosts, String[] topologyPaths,
/**
* Constructor with host, name, network topology, offset, length
* and corrupt flag.
+ * @param names names.
+ * @param hosts hosts.
+ * @param topologyPaths topologyPaths.
+ * @param offset offset.
+ * @param length length.
+ * @param corrupt corrupt.
*/
public BlockLocation(String[] names, String[] hosts, String[] topologyPaths,
long offset, long length, boolean corrupt) {
@@ -177,6 +198,8 @@ public BlockLocation(String[] names, String[] hosts, String[] cachedHosts,
/**
* Get the list of hosts (hostname) hosting this block.
+ * @return hosts array.
+ * @throws IOException If an I/O error occurred.
*/
public String[] getHosts() throws IOException {
return hosts;
@@ -184,6 +207,7 @@ public String[] getHosts() throws IOException {
/**
* Get the list of hosts (hostname) hosting a cached replica of the block.
+ * @return cached hosts.
*/
public String[] getCachedHosts() {
return cachedHosts;
@@ -191,6 +215,8 @@ public String[] getCachedHosts() {
/**
* Get the list of names (IP:xferPort) hosting this block.
+ * @return names array.
+ * @throws IOException If an I/O error occurred.
*/
public String[] getNames() throws IOException {
return names;
@@ -199,6 +225,8 @@ public String[] getNames() throws IOException {
/**
* Get the list of network topology paths for each of the hosts.
* The last component of the path is the "name" (IP:xferPort).
+ * @return topology paths.
+ * @throws IOException If an I/O error occurred.
*/
public String[] getTopologyPaths() throws IOException {
return topologyPaths;
@@ -206,6 +234,7 @@ public String[] getTopologyPaths() throws IOException {
/**
* Get the storageID of each replica of the block.
+ * @return storage ids.
*/
public String[] getStorageIds() {
return storageIds;
@@ -213,6 +242,7 @@ public String[] getStorageIds() {
/**
* Get the storage type of each replica of the block.
+ * @return storage type of each replica of the block.
*/
public StorageType[] getStorageTypes() {
return storageTypes;
@@ -220,6 +250,7 @@ public StorageType[] getStorageTypes() {
/**
* Get the start offset of file associated with this block.
+ * @return start offset of file associated with this block.
*/
public long getOffset() {
return offset;
@@ -227,6 +258,7 @@ public long getOffset() {
/**
* Get the length of the block.
+ * @return length of the block.
*/
public long getLength() {
return length;
@@ -234,6 +266,7 @@ public long getLength() {
/**
* Get the corrupt flag.
+ * @return corrupt flag.
*/
public boolean isCorrupt() {
return corrupt;
@@ -241,6 +274,7 @@ public boolean isCorrupt() {
/**
* Return true if the block is striped (erasure coded).
+ * @return if the block is striped true, not false.
*/
public boolean isStriped() {
return false;
@@ -248,6 +282,7 @@ public boolean isStriped() {
/**
* Set the start offset of file associated with this block.
+ * @param offset start offset.
*/
public void setOffset(long offset) {
this.offset = offset;
@@ -255,6 +290,7 @@ public void setOffset(long offset) {
/**
* Set the length of block.
+ * @param length length of block.
*/
public void setLength(long length) {
this.length = length;
@@ -262,6 +298,7 @@ public void setLength(long length) {
/**
* Set the corrupt flag.
+ * @param corrupt corrupt flag.
*/
public void setCorrupt(boolean corrupt) {
this.corrupt = corrupt;
@@ -269,6 +306,8 @@ public void setCorrupt(boolean corrupt) {
/**
* Set the hosts hosting this block.
+ * @param hosts hosts array.
+ * @throws IOException If an I/O error occurred.
*/
public void setHosts(String[] hosts) throws IOException {
if (hosts == null) {
@@ -280,6 +319,7 @@ public void setHosts(String[] hosts) throws IOException {
/**
* Set the hosts hosting a cached replica of this block.
+ * @param cachedHosts cached hosts.
*/
public void setCachedHosts(String[] cachedHosts) {
if (cachedHosts == null) {
@@ -291,6 +331,8 @@ public void setCachedHosts(String[] cachedHosts) {
/**
* Set the names (host:port) hosting this block.
+ * @param names names.
+ * @throws IOException If an I/O error occurred.
*/
public void setNames(String[] names) throws IOException {
if (names == null) {
@@ -302,6 +344,9 @@ public void setNames(String[] names) throws IOException {
/**
* Set the network topology paths of the hosts.
+ *
+ * @param topologyPaths topology paths.
+ * @throws IOException If an I/O error occurred.
*/
public void setTopologyPaths(String[] topologyPaths) throws IOException {
if (topologyPaths == null) {
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BufferedFSInputStream.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BufferedFSInputStream.java
index 59345f5d25caf..7f3171235c8f4 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BufferedFSInputStream.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/BufferedFSInputStream.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -22,6 +22,9 @@
import java.io.FileDescriptor;
import java.io.IOException;
import java.util.StringJoiner;
+import java.nio.ByteBuffer;
+import java.util.List;
+import java.util.function.IntFunction;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
@@ -158,8 +161,24 @@ public IOStatistics getIOStatistics() {
@Override
public String toString() {
return new StringJoiner(", ",
- BufferedFSInputStream.class.getSimpleName() + "[", "]")
- .add("in=" + in)
- .toString();
+ BufferedFSInputStream.class.getSimpleName() + "[", "]")
+ .add("in=" + in)
+ .toString();
+ }
+
+ @Override
+ public int minSeekForVectorReads() {
+ return ((PositionedReadable) in).minSeekForVectorReads();
+ }
+
+ @Override
+ public int maxReadSizeForVectorReads() {
+ return ((PositionedReadable) in).maxReadSizeForVectorReads();
+ }
+
+ @Override
+ public void readVectored(List extends FileRange> ranges,
+ IntFunction allocate) throws IOException {
+ ((PositionedReadable) in).readVectored(ranges, allocate);
}
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ByteBufferUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ByteBufferUtil.java
index 6576fe5827d94..f577649dd5fce 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ByteBufferUtil.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ByteBufferUtil.java
@@ -47,6 +47,12 @@ private static boolean streamHasByteBufferRead(InputStream stream) {
/**
* Perform a fallback read.
+ *
+ * @param stream input stream.
+ * @param bufferPool bufferPool.
+ * @param maxLength maxLength.
+ * @throws IOException raised on errors performing I/O.
+ * @return byte buffer.
*/
public static ByteBuffer fallbackRead(
InputStream stream, ByteBufferPool bufferPool, int maxLength)
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CachingGetSpaceUsed.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CachingGetSpaceUsed.java
index 58dc82d2efb2d..d7b61346d4e3b 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CachingGetSpaceUsed.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CachingGetSpaceUsed.java
@@ -19,6 +19,7 @@
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
+import org.apache.hadoop.classification.VisibleForTesting;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
@@ -52,6 +53,9 @@ public abstract class CachingGetSpaceUsed implements Closeable, GetSpaceUsed {
/**
* This is the constructor used by the builder.
* All overriding classes should implement this.
+ *
+ * @param builder builder.
+ * @throws IOException raised on errors performing I/O.
*/
public CachingGetSpaceUsed(CachingGetSpaceUsed.Builder builder)
throws IOException {
@@ -89,19 +93,19 @@ void init() {
if (!shouldFirstRefresh) {
// Skip initial refresh operation, so we need to do first refresh
// operation immediately in refresh thread.
- initRefeshThread(true);
+ initRefreshThread(true);
return;
}
refresh();
}
- initRefeshThread(false);
+ initRefreshThread(false);
}
/**
* RunImmediately should set true, if we skip the first refresh.
* @param runImmediately The param default should be false.
*/
- private void initRefeshThread (boolean runImmediately) {
+ private void initRefreshThread(boolean runImmediately) {
if (refreshInterval > 0) {
refreshUsed = new Thread(new RefreshThread(this, runImmediately),
"refreshUsed-" + dirPath);
@@ -139,6 +143,8 @@ public String getDirPath() {
/**
* Increment the cached value of used space.
+ *
+ * @param value dfs used value.
*/
public void incDfsUsed(long value) {
used.addAndGet(value);
@@ -153,11 +159,25 @@ boolean running() {
/**
* How long in between runs of the background refresh.
+ *
+ * @return refresh interval.
*/
- long getRefreshInterval() {
+ @VisibleForTesting
+ public long getRefreshInterval() {
return refreshInterval;
}
+ /**
+ * Randomize the refresh interval timing by this amount, the actual interval will be chosen
+ * uniformly between {@code interval-jitter} and {@code interval+jitter}.
+ *
+ * @return between interval-jitter and interval+jitter.
+ */
+ @VisibleForTesting
+ public long getJitter() {
+ return jitter;
+ }
+
/**
* Reset the current used data amount. This should be called
* when the cached value is re-computed.
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java
index c7f8e36c3f675..4c7569d6ecd81 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFileSystem.java
@@ -22,18 +22,24 @@
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
+import java.nio.ByteBuffer;
+import java.nio.IntBuffer;
import java.nio.channels.ClosedChannelException;
+import java.util.ArrayList;
import java.util.Arrays;
-import java.util.Collections;
import java.util.EnumSet;
import java.util.List;
import java.util.concurrent.CompletableFuture;
+import java.util.concurrent.CompletionException;
+import java.util.function.IntFunction;
+import java.util.zip.CRC32;
import org.apache.hadoop.util.Preconditions;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.impl.AbstractFSBuilderImpl;
+import org.apache.hadoop.fs.impl.CombinedFileRange;
import org.apache.hadoop.fs.impl.FutureDataInputStreamBuilderImpl;
import org.apache.hadoop.fs.impl.OpenFileParameters;
import org.apache.hadoop.fs.permission.AclEntry;
@@ -45,8 +51,10 @@
import org.apache.hadoop.util.LambdaUtils;
import org.apache.hadoop.util.Progressable;
+import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_STANDARD_OPTIONS;
import static org.apache.hadoop.fs.impl.PathCapabilitiesSupport.validatePathCapabilityArgs;
import static org.apache.hadoop.fs.impl.StoreImplementationUtils.isProbeForSyncable;
+import static org.apache.hadoop.fs.VectoredReadUtils.sortRanges;
/****************************************************************
* Abstract Checksumed FileSystem.
@@ -66,7 +74,7 @@ public abstract class ChecksumFileSystem extends FilterFileSystem {
public static double getApproxChkSumLength(long size) {
return ChecksumFSOutputSummer.CHKSUM_AS_FRACTION * size;
}
-
+
public ChecksumFileSystem(FileSystem fs) {
super(fs);
}
@@ -82,7 +90,7 @@ public void setConf(Configuration conf) {
bytesPerChecksum);
}
}
-
+
/**
* Set whether to verify checksum.
*/
@@ -95,32 +103,51 @@ public void setVerifyChecksum(boolean verifyChecksum) {
public void setWriteChecksum(boolean writeChecksum) {
this.writeChecksum = writeChecksum;
}
-
+
/** get the raw file system */
@Override
public FileSystem getRawFileSystem() {
return fs;
}
- /** Return the name of the checksum file associated with a file.*/
+ /**
+ * Return the name of the checksum file associated with a file.
+ *
+ * @param file the file path.
+ * @return name of the checksum file associated with a file.
+ */
public Path getChecksumFile(Path file) {
return new Path(file.getParent(), "." + file.getName() + ".crc");
}
- /** Return true iff file is a checksum file name.*/
+ /**
+ * Return true if file is a checksum file name.
+ *
+ * @param file the file path.
+ * @return if file is a checksum file true, not false.
+ */
public static boolean isChecksumFile(Path file) {
String name = file.getName();
return name.startsWith(".") && name.endsWith(".crc");
}
- /** Return the length of the checksum file given the size of the
+ /**
+ * Return the length of the checksum file given the size of the
* actual file.
- **/
+ *
+ * @param file the file path.
+ * @param fileSize file size.
+ * @return checksum length.
+ */
public long getChecksumFileLength(Path file, long fileSize) {
return getChecksumLength(fileSize, getBytesPerSum());
}
- /** Return the bytes Per Checksum */
+ /**
+ * Return the bytes Per Checksum.
+ *
+ * @return bytes per check sum.
+ */
public int getBytesPerSum() {
return bytesPerChecksum;
}
@@ -139,22 +166,23 @@ private int getSumBufferSize(int bytesPerSum, int bufferSize) {
* It verifies that data matches checksums.
*******************************************************/
private static class ChecksumFSInputChecker extends FSInputChecker implements
- IOStatisticsSource {
+ IOStatisticsSource, StreamCapabilities {
private ChecksumFileSystem fs;
private FSDataInputStream datas;
private FSDataInputStream sums;
-
+
private static final int HEADER_LENGTH = 8;
-
+
private int bytesPerSum = 1;
-
+ private long fileLen = -1L;
+
public ChecksumFSInputChecker(ChecksumFileSystem fs, Path file)
throws IOException {
this(fs, file, fs.getConf().getInt(
- LocalFileSystemConfigKeys.LOCAL_FS_STREAM_BUFFER_SIZE_KEY,
+ LocalFileSystemConfigKeys.LOCAL_FS_STREAM_BUFFER_SIZE_KEY,
LocalFileSystemConfigKeys.LOCAL_FS_STREAM_BUFFER_SIZE_DEFAULT));
}
-
+
public ChecksumFSInputChecker(ChecksumFileSystem fs, Path file, int bufferSize)
throws IOException {
super( file, fs.getFileStatus(file).getReplication() );
@@ -170,7 +198,8 @@ public ChecksumFSInputChecker(ChecksumFileSystem fs, Path file, int bufferSize)
if (!Arrays.equals(version, CHECKSUM_VERSION))
throw new IOException("Not a checksum file: "+sumFile);
this.bytesPerSum = sums.readInt();
- set(fs.verifyChecksum, DataChecksum.newCrc32(), bytesPerSum, 4);
+ set(fs.verifyChecksum, DataChecksum.newCrc32(), bytesPerSum,
+ FSInputChecker.CHECKSUM_SIZE);
} catch (IOException e) {
// mincing the message is terrible, but java throws permission
// exceptions as FNF because that's all the method signatures allow!
@@ -182,21 +211,21 @@ public ChecksumFSInputChecker(ChecksumFileSystem fs, Path file, int bufferSize)
set(fs.verifyChecksum, null, 1, 0);
}
}
-
+
private long getChecksumFilePos( long dataPos ) {
- return HEADER_LENGTH + 4*(dataPos/bytesPerSum);
+ return HEADER_LENGTH + FSInputChecker.CHECKSUM_SIZE*(dataPos/bytesPerSum);
}
-
+
@Override
protected long getChunkPosition( long dataPos ) {
return dataPos/bytesPerSum*bytesPerSum;
}
-
+
@Override
public int available() throws IOException {
return datas.available() + super.available();
}
-
+
@Override
public int read(long position, byte[] b, int off, int len)
throws IOException {
@@ -214,7 +243,7 @@ public int read(long position, byte[] b, int off, int len)
}
return nread;
}
-
+
@Override
public void close() throws IOException {
datas.close();
@@ -223,7 +252,7 @@ public void close() throws IOException {
}
set(fs.verifyChecksum, null, 1, 0);
}
-
+
@Override
public boolean seekToNewSource(long targetPos) throws IOException {
@@ -246,7 +275,7 @@ protected int readChunk(long pos, byte[] buf, int offset, int len,
final int checksumsToRead = Math.min(
len/bytesPerSum, // number of checksums based on len to read
checksum.length / CHECKSUM_SIZE); // size of checksum buffer
- long checksumPos = getChecksumFilePos(pos);
+ long checksumPos = getChecksumFilePos(pos);
if(checksumPos != sums.getPos()) {
sums.seek(checksumPos);
}
@@ -286,8 +315,187 @@ protected int readChunk(long pos, byte[] buf, int offset, int len,
public IOStatistics getIOStatistics() {
return IOStatisticsSupport.retrieveIOStatistics(datas);
}
+
+ public static long findChecksumOffset(long dataOffset,
+ int bytesPerSum) {
+ return HEADER_LENGTH + (dataOffset/bytesPerSum) * FSInputChecker.CHECKSUM_SIZE;
+ }
+
+ /**
+ * Calculate length of file if not already cached.
+ * @return file length.
+ * @throws IOException any IOE.
+ */
+ private long getFileLength() throws IOException {
+ if (fileLen == -1L) {
+ fileLen = fs.getFileStatus(file).getLen();
+ }
+ return fileLen;
+ }
+
+ /**
+ * Find the checksum ranges that correspond to the given data ranges.
+ * @param dataRanges the input data ranges, which are assumed to be sorted
+ * and non-overlapping
+ * @return a list of AsyncReaderUtils.CombinedFileRange that correspond to
+ * the checksum ranges
+ */
+ public static List findChecksumRanges(
+ List extends FileRange> dataRanges,
+ int bytesPerSum,
+ int minSeek,
+ int maxSize) {
+ List result = new ArrayList<>();
+ CombinedFileRange currentCrc = null;
+ for(FileRange range: dataRanges) {
+ long crcOffset = findChecksumOffset(range.getOffset(), bytesPerSum);
+ long crcEnd = findChecksumOffset(range.getOffset() + range.getLength() +
+ bytesPerSum - 1, bytesPerSum);
+ if (currentCrc == null ||
+ !currentCrc.merge(crcOffset, crcEnd, range, minSeek, maxSize)) {
+ currentCrc = new CombinedFileRange(crcOffset, crcEnd, range);
+ result.add(currentCrc);
+ }
+ }
+ return result;
+ }
+
+ /**
+ * Check the data against the checksums.
+ * @param sumsBytes the checksum data
+ * @param sumsOffset where from the checksum file this buffer started
+ * @param data the file data
+ * @param dataOffset where the file data started (must be a multiple of
+ * bytesPerSum)
+ * @param bytesPerSum how many bytes per a checksum
+ * @param file the path of the filename
+ * @return the data buffer
+ * @throws CompletionException if the checksums don't match
+ */
+ static ByteBuffer checkBytes(ByteBuffer sumsBytes,
+ long sumsOffset,
+ ByteBuffer data,
+ long dataOffset,
+ int bytesPerSum,
+ Path file) {
+ // determine how many bytes we need to skip at the start of the sums
+ int offset =
+ (int) (findChecksumOffset(dataOffset, bytesPerSum) - sumsOffset);
+ IntBuffer sums = sumsBytes.asIntBuffer();
+ sums.position(offset / FSInputChecker.CHECKSUM_SIZE);
+ ByteBuffer current = data.duplicate();
+ int numFullChunks = data.remaining() / bytesPerSum;
+ boolean partialChunk = ((data.remaining() % bytesPerSum) != 0);
+ int totalChunks = numFullChunks;
+ if (partialChunk) {
+ totalChunks++;
+ }
+ CRC32 crc = new CRC32();
+ // check each chunk to ensure they match
+ for(int c = 0; c < totalChunks; ++c) {
+ // set the buffer position to the start of every chunk.
+ current.position(c * bytesPerSum);
+
+ if (c == numFullChunks) {
+ // During last chunk, there may be less than chunk size
+ // data preset, so setting the limit accordingly.
+ int lastIncompleteChunk = data.remaining() % bytesPerSum;
+ current.limit((c * bytesPerSum) + lastIncompleteChunk);
+ } else {
+ // set the buffer limit to end of every chunk.
+ current.limit((c + 1) * bytesPerSum);
+ }
+
+ // compute the crc
+ crc.reset();
+ crc.update(current);
+ int expected = sums.get();
+ int calculated = (int) crc.getValue();
+
+ if (calculated != expected) {
+ // cast of c added to silence findbugs
+ long errPosn = dataOffset + (long) c * bytesPerSum;
+ throw new CompletionException(new ChecksumException(
+ "Checksum error: " + file + " at " + errPosn +
+ " exp: " + expected + " got: " + calculated, errPosn));
+ }
+ }
+ // if everything matches, we return the data
+ return data;
+ }
+
+ /**
+ * Validates range parameters.
+ * In case of CheckSum FS, we already have calculated
+ * fileLength so failing fast here.
+ * @param ranges requested ranges.
+ * @param fileLength length of file.
+ * @throws EOFException end of file exception.
+ */
+ private void validateRangeRequest(List extends FileRange> ranges,
+ final long fileLength) throws EOFException {
+ for (FileRange range : ranges) {
+ VectoredReadUtils.validateRangeRequest(range);
+ if (range.getOffset() + range.getLength() > fileLength) {
+ final String errMsg = String.format("Requested range [%d, %d) is beyond EOF for path %s",
+ range.getOffset(), range.getLength(), file);
+ LOG.warn(errMsg);
+ throw new EOFException(errMsg);
+ }
+ }
+ }
+
+ @Override
+ public void readVectored(List extends FileRange> ranges,
+ IntFunction allocate) throws IOException {
+ final long length = getFileLength();
+ validateRangeRequest(ranges, length);
+
+ // If the stream doesn't have checksums, just delegate.
+ if (sums == null) {
+ datas.readVectored(ranges, allocate);
+ return;
+ }
+ int minSeek = minSeekForVectorReads();
+ int maxSize = maxReadSizeForVectorReads();
+ List dataRanges =
+ VectoredReadUtils.mergeSortedRanges(Arrays.asList(sortRanges(ranges)), bytesPerSum,
+ minSeek, maxReadSizeForVectorReads());
+ // While merging the ranges above, they are rounded up based on the value of bytesPerSum
+ // which leads to some ranges crossing the EOF thus they need to be fixed else it will
+ // cause EOFException during actual reads.
+ for (CombinedFileRange range : dataRanges) {
+ if (range.getOffset() + range.getLength() > length) {
+ range.setLength((int) (length - range.getOffset()));
+ }
+ }
+ List checksumRanges = findChecksumRanges(dataRanges,
+ bytesPerSum, minSeek, maxSize);
+ sums.readVectored(checksumRanges, allocate);
+ datas.readVectored(dataRanges, allocate);
+ for(CombinedFileRange checksumRange: checksumRanges) {
+ for(FileRange dataRange: checksumRange.getUnderlying()) {
+ // when we have both the ranges, validate the checksum
+ CompletableFuture result =
+ checksumRange.getData().thenCombineAsync(dataRange.getData(),
+ (sumBuffer, dataBuffer) ->
+ checkBytes(sumBuffer, checksumRange.getOffset(),
+ dataBuffer, dataRange.getOffset(), bytesPerSum, file));
+ // Now, slice the read data range to the user's ranges
+ for(FileRange original: ((CombinedFileRange) dataRange).getUnderlying()) {
+ original.setData(result.thenApply(
+ (b) -> VectoredReadUtils.sliceTo(b, dataRange.getOffset(), original)));
+ }
+ }
+ }
+ }
+
+ @Override
+ public boolean hasCapability(String capability) {
+ return datas.hasCapability(capability);
+ }
}
-
+
private static class FSDataBoundedInputStream extends FSDataInputStream {
private FileSystem fs;
private Path file;
@@ -298,12 +506,12 @@ private static class FSDataBoundedInputStream extends FSDataInputStream {
this.fs = fs;
this.file = file;
}
-
+
@Override
public boolean markSupported() {
return false;
}
-
+
/* Return the file length */
private long getFileLength() throws IOException {
if( fileLen==-1L ) {
@@ -311,7 +519,7 @@ private long getFileLength() throws IOException {
}
return fileLen;
}
-
+
/**
* Skips over and discards n bytes of data from the
* input stream.
@@ -335,11 +543,11 @@ public synchronized long skip(long n) throws IOException {
}
return super.skip(n);
}
-
+
/**
* Seek to the given position in the stream.
* The next read() will be from that position.
- *
+ *
*
This method does not allow seek past the end of the file.
* This produces IOException.
*
@@ -362,6 +570,7 @@ public synchronized void seek(long pos) throws IOException {
* Opens an FSDataInputStream at the indicated Path.
* @param f the file name to open
* @param bufferSize the size of the buffer to be used.
+ * @throws IOException if an I/O error occurs.
*/
@Override
public FSDataInputStream open(Path f, int bufferSize) throws IOException {
@@ -404,22 +613,22 @@ public void concat(final Path f, final Path[] psrcs) throws IOException {
*/
public static long getChecksumLength(long size, int bytesPerSum) {
//the checksum length is equal to size passed divided by bytesPerSum +
- //bytes written in the beginning of the checksum file.
- return ((size + bytesPerSum - 1) / bytesPerSum) * 4 +
- CHECKSUM_VERSION.length + 4;
+ //bytes written in the beginning of the checksum file.
+ return ((size + bytesPerSum - 1) / bytesPerSum) * FSInputChecker.CHECKSUM_SIZE +
+ ChecksumFSInputChecker.HEADER_LENGTH;
}
/** This class provides an output stream for a checksummed file.
* It generates checksums for data. */
private static class ChecksumFSOutputSummer extends FSOutputSummer
implements IOStatisticsSource, StreamCapabilities {
- private FSDataOutputStream datas;
+ private FSDataOutputStream datas;
private FSDataOutputStream sums;
private static final float CHKSUM_AS_FRACTION = 0.01f;
private boolean isClosed = false;
-
- public ChecksumFSOutputSummer(ChecksumFileSystem fs,
- Path file,
+
+ ChecksumFSOutputSummer(ChecksumFileSystem fs,
+ Path file,
boolean overwrite,
int bufferSize,
short replication,
@@ -440,7 +649,7 @@ public ChecksumFSOutputSummer(ChecksumFileSystem fs,
sums.write(CHECKSUM_VERSION, 0, CHECKSUM_VERSION.length);
sums.writeInt(bytesPerSum);
}
-
+
@Override
public void close() throws IOException {
try {
@@ -451,7 +660,7 @@ public void close() throws IOException {
isClosed = true;
}
}
-
+
@Override
protected void writeChunk(byte[] b, int offset, int len, byte[] checksum,
int ckoff, int cklen)
@@ -669,7 +878,7 @@ boolean apply(Path p) throws IOException {
* Implement the abstract setReplication of FileSystem
* @param src file name
* @param replication new replication
- * @throws IOException
+ * @throws IOException if an I/O error occurs.
* @return true if successful;
* false if file does not exist or is a directory
*/
@@ -707,7 +916,7 @@ public boolean rename(Path src, Path dst) throws IOException {
value = fs.rename(srcCheckFile, dstCheckFile);
} else if (fs.exists(dstCheckFile)) {
// no src checksum, so remove dst checksum
- value = fs.delete(dstCheckFile, true);
+ value = fs.delete(dstCheckFile, true);
}
return value;
@@ -739,7 +948,7 @@ public boolean delete(Path f, boolean recursive) throws IOException{
return fs.delete(f, true);
}
}
-
+
final private static PathFilter DEFAULT_FILTER = new PathFilter() {
@Override
public boolean accept(Path file) {
@@ -750,11 +959,11 @@ public boolean accept(Path file) {
/**
* List the statuses of the files/directories in the given path if the path is
* a directory.
- *
+ *
* @param f
* given path
* @return the statuses of the files/directories in the given path
- * @throws IOException
+ * @throws IOException if an I/O error occurs.
*/
@Override
public FileStatus[] listStatus(Path f) throws IOException {
@@ -771,18 +980,18 @@ public RemoteIterator listStatusIterator(final Path p)
/**
* List the statuses of the files/directories in the given path if the path is
* a directory.
- *
+ *
* @param f
* given path
* @return the statuses of the files/directories in the given patch
- * @throws IOException
+ * @throws IOException if an I/O error occurs.
*/
@Override
public RemoteIterator listLocatedStatus(Path f)
throws IOException {
return fs.listLocatedStatus(f, DEFAULT_FILTER);
}
-
+
@Override
public boolean mkdirs(Path f) throws IOException {
return fs.mkdirs(f);
@@ -811,6 +1020,10 @@ public void copyToLocalFile(boolean delSrc, Path src, Path dst)
* Copy it from FS control to the local dst name.
* If src and dst are directories, the copyCrc parameter
* determines whether to copy CRC files.
+ * @param src src path.
+ * @param dst dst path.
+ * @param copyCrc copy csc flag.
+ * @throws IOException if an I/O error occurs.
*/
@SuppressWarnings("deprecation")
public void copyToLocalFile(Path src, Path dst, boolean copyCrc)
@@ -832,7 +1045,7 @@ public void copyToLocalFile(Path src, Path dst, boolean copyCrc)
} else {
FileStatus[] srcs = listStatus(src);
for (FileStatus srcFile : srcs) {
- copyToLocalFile(srcFile.getPath(),
+ copyToLocalFile(srcFile.getPath(),
new Path(dst, srcFile.getPath().getName()), copyCrc);
}
}
@@ -889,7 +1102,7 @@ protected CompletableFuture openFileWithOptions(
final OpenFileParameters parameters) throws IOException {
AbstractFSBuilderImpl.rejectUnknownMandatoryKeys(
parameters.getMandatoryKeys(),
- Collections.emptySet(),
+ FS_OPTION_OPENFILE_STANDARD_OPTIONS,
"for " + path);
return LambdaUtils.eval(
new CompletableFuture<>(),
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFs.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFs.java
index bc1122c56a2bd..4820c5c3045d7 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFs.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ChecksumFs.java
@@ -70,30 +70,53 @@ public void setVerifyChecksum(boolean inVerifyChecksum) {
this.verifyChecksum = inVerifyChecksum;
}
- /** get the raw file system. */
+ /**
+ * get the raw file system.
+ *
+ * @return abstract file system.
+ */
public AbstractFileSystem getRawFs() {
return getMyFs();
}
- /** Return the name of the checksum file associated with a file.*/
+ /**
+ * Return the name of the checksum file associated with a file.
+ *
+ * @param file the file path.
+ * @return the checksum file associated with a file.
+ */
public Path getChecksumFile(Path file) {
return new Path(file.getParent(), "." + file.getName() + ".crc");
}
- /** Return true iff file is a checksum file name.*/
+ /**
+ * Return true iff file is a checksum file name.
+ *
+ * @param file the file path.
+ * @return if is checksum file true,not false.
+ */
public static boolean isChecksumFile(Path file) {
String name = file.getName();
return name.startsWith(".") && name.endsWith(".crc");
}
- /** Return the length of the checksum file given the size of the
+ /**
+ * Return the length of the checksum file given the size of the
* actual file.
- **/
+ *
+ * @param file the file path.
+ * @param fileSize file size.
+ * @return check sum file length.
+ */
public long getChecksumFileLength(Path file, long fileSize) {
return getChecksumLength(fileSize, getBytesPerSum());
}
- /** Return the bytes Per Checksum. */
+ /**
+ * Return the bytes Per Checksum.
+ *
+ * @return bytes per sum.
+ */
public int getBytesPerSum() {
return defaultBytesPerChecksum;
}
@@ -433,7 +456,7 @@ private boolean isDirectory(Path f)
* Implement the abstract setReplication of FileSystem
* @param src file name
* @param replication new replication
- * @throws IOException
+ * @throws IOException if an I/O error occurs.
* @return true if successful;
* false if file does not exist or is a directory
*/
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java
index 6949c67f278d1..9d6224366d1ba 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeys.java
@@ -399,6 +399,12 @@ public class CommonConfigurationKeys extends CommonConfigurationKeysPublic {
public static final String ZK_ACL_DEFAULT = "world:anyone:rwcda";
/** Authentication for the ZooKeeper ensemble. */
public static final String ZK_AUTH = ZK_PREFIX + "auth";
+ /** Principal name for zookeeper servers. */
+ public static final String ZK_SERVER_PRINCIPAL = ZK_PREFIX + "server.principal";
+ /** Kerberos principal name for zookeeper connection. */
+ public static final String ZK_KERBEROS_PRINCIPAL = ZK_PREFIX + "kerberos.principal";
+ /** Kerberos keytab for zookeeper connection. */
+ public static final String ZK_KERBEROS_KEYTAB = ZK_PREFIX + "kerberos.keytab";
/** Address of the ZooKeeper ensemble. */
public static final String ZK_ADDRESS = ZK_PREFIX + "address";
@@ -469,4 +475,21 @@ public class CommonConfigurationKeys extends CommonConfigurationKeysPublic {
* default hadoop temp dir on local system: {@value}.
*/
public static final String HADOOP_TMP_DIR = "hadoop.tmp.dir";
+
+ /**
+ * Thread-level IOStats Support.
+ * {@value}
+ */
+ public static final String IOSTATISTICS_THREAD_LEVEL_ENABLED =
+ "fs.iostatistics.thread.level.enabled";
+
+ /**
+ * Default value for Thread-level IOStats Support is true.
+ */
+ public static final boolean IOSTATISTICS_THREAD_LEVEL_ENABLED_DEFAULT =
+ true;
+
+ public static final String HADOOP_SECURITY_RESOLVER_IMPL =
+ "hadoop.security.resolver.impl";
+
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java
index a799e883bcf2a..67cd81ee91a96 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonConfigurationKeysPublic.java
@@ -169,11 +169,11 @@ public class CommonConfigurationKeysPublic {
/**
* Number of filesystems instances can be created in parallel.
- *
+ *
* A higher number here does not necessarily improve performance, especially
* for object stores, where multiple threads may be attempting to create an FS
* instance for the same URI.
- *
+ *
* Default value: {@value}.
*/
public static final String FS_CREATION_PARALLEL_COUNT =
@@ -181,8 +181,9 @@ public class CommonConfigurationKeysPublic {
/**
* Default value for {@link #FS_CREATION_PARALLEL_COUNT}.
- *
+ *
* Default value: {@value}.
+ *
*/
public static final int FS_CREATION_PARALLEL_COUNT_DEFAULT =
64;
@@ -999,6 +1000,7 @@ public class CommonConfigurationKeysPublic {
String.join(",",
"secret$",
"password$",
+ "username$",
"ssl.keystore.pass$",
"fs.s3.*[Ss]ecret.?[Kk]ey",
"fs.s3a.*.server-side-encryption.key",
@@ -1053,5 +1055,13 @@ public class CommonConfigurationKeysPublic {
public static final String HADOOP_HTTP_IDLE_TIMEOUT_MS_KEY =
"hadoop.http.idle_timeout.ms";
public static final int HADOOP_HTTP_IDLE_TIMEOUT_MS_DEFAULT = 60000;
+
+ /**
+ * To configure scheduling of server metrics update thread. This config is used to indicate
+ * initial delay and delay between each execution of the metric update runnable thread.
+ */
+ public static final String IPC_SERVER_METRICS_UPDATE_RUNNER_INTERVAL =
+ "ipc.server.metrics.update.runner.interval";
+ public static final int IPC_SERVER_METRICS_UPDATE_RUNNER_INTERVAL_DEFAULT = 5000;
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonPathCapabilities.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonPathCapabilities.java
index df932df43aebd..aa231554eb0cb 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonPathCapabilities.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CommonPathCapabilities.java
@@ -146,4 +146,22 @@ private CommonPathCapabilities() {
*/
public static final String ABORTABLE_STREAM =
"fs.capability.outputstream.abortable";
+
+ /**
+ * Does this FS support etags?
+ * That is: will FileStatus entries from listing/getFileStatus
+ * probes support EtagSource and return real values.
+ */
+ public static final String ETAGS_AVAILABLE =
+ "fs.capability.etags.available";
+
+ /**
+ * Are etags guaranteed to be preserved across rename() operations..
+ * FileSystems MUST NOT declare support for this feature
+ * unless this holds.
+ */
+ public static final String ETAGS_PRESERVED_IN_RENAME =
+ "fs.capability.etags.preserved.in.rename";
+
+
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CompositeCrcFileChecksum.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CompositeCrcFileChecksum.java
index e1ed5cbcfcaa6..bdbc8f3a33f4b 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CompositeCrcFileChecksum.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CompositeCrcFileChecksum.java
@@ -37,7 +37,13 @@ public class CompositeCrcFileChecksum extends FileChecksum {
private DataChecksum.Type crcType;
private int bytesPerCrc;
- /** Create a CompositeCrcFileChecksum. */
+ /**
+ * Create a CompositeCrcFileChecksum.
+ *
+ * @param crc crc.
+ * @param crcType crcType.
+ * @param bytesPerCrc bytesPerCrc.
+ */
public CompositeCrcFileChecksum(
int crc, DataChecksum.Type crcType, int bytesPerCrc) {
this.crc = crc;
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ContentSummary.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ContentSummary.java
index 79850e1a2f291..9f97a12fa6088 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ContentSummary.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/ContentSummary.java
@@ -149,17 +149,31 @@ public ContentSummary build() {
@Deprecated
public ContentSummary() {}
- /** Constructor, deprecated by ContentSummary.Builder
+ /**
+ * Constructor, deprecated by ContentSummary.Builder
* This constructor implicitly set spaceConsumed the same as length.
* spaceConsumed and length must be set explicitly with
- * ContentSummary.Builder
+ * ContentSummary.Builder.
+ *
+ * @param length length.
+ * @param fileCount file count.
+ * @param directoryCount directory count.
* */
@Deprecated
public ContentSummary(long length, long fileCount, long directoryCount) {
this(length, fileCount, directoryCount, -1L, length, -1L);
}
- /** Constructor, deprecated by ContentSummary.Builder */
+ /**
+ * Constructor, deprecated by ContentSummary.Builder.
+ *
+ * @param length length.
+ * @param fileCount file count.
+ * @param directoryCount directory count.
+ * @param quota quota.
+ * @param spaceConsumed space consumed.
+ * @param spaceQuota space quota.
+ * */
@Deprecated
public ContentSummary(
long length, long fileCount, long directoryCount, long quota,
@@ -172,7 +186,11 @@ public ContentSummary(
setSpaceQuota(spaceQuota);
}
- /** Constructor for ContentSummary.Builder*/
+ /**
+ * Constructor for ContentSummary.Builder.
+ *
+ * @param builder builder.
+ */
private ContentSummary(Builder builder) {
super(builder);
this.length = builder.length;
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CreateFlag.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CreateFlag.java
index 71993713ad2eb..ca008e536931d 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CreateFlag.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/CreateFlag.java
@@ -189,6 +189,8 @@ public static void validate(Object path, boolean pathExists,
/**
* Validate the CreateFlag for the append operation. The flag must contain
* APPEND, and cannot contain OVERWRITE.
+ *
+ * @param flag enum set flag.
*/
public static void validateForAppend(EnumSet flag) {
validate(flag);
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DF.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DF.java
index da4636b2c0fbe..c5a052f3de4be 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DF.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DF.java
@@ -65,7 +65,10 @@ public String getDirPath() {
return dirPath;
}
- /** @return a string indicating which filesystem volume we're checking. */
+ /**
+ * @return a string indicating which filesystem volume we're checking.
+ * @throws IOException raised on errors performing I/O.
+ */
public String getFilesystem() throws IOException {
if (Shell.WINDOWS) {
this.filesystem = dirFile.getCanonicalPath().substring(0, 2);
@@ -100,7 +103,10 @@ public int getPercentUsed() {
return (int) (used * 100.0 / cap);
}
- /** @return the filesystem mount point for the indicated volume */
+ /**
+ * @return the filesystem mount point for the indicated volume.
+ * @throws IOException raised on errors performing I/O.
+ */
public String getMount() throws IOException {
// Abort early if specified path does not exist
if (!dirFile.exists()) {
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DelegationTokenRenewer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DelegationTokenRenewer.java
index 33905dcbb77fd..794855508c63f 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DelegationTokenRenewer.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/DelegationTokenRenewer.java
@@ -47,7 +47,11 @@ public interface Renewable {
/** @return the renew token. */
public Token> getRenewToken();
- /** Set delegation token. */
+ /**
+ * Set delegation token.
+ * @param generic type T.
+ * @param token token.
+ */
public void setDelegationToken(Token token);
}
@@ -172,7 +176,11 @@ public String toString() {
/** Queue to maintain the RenewActions to be processed by the {@link #run()} */
private volatile DelayQueue> queue = new DelayQueue>();
- /** For testing purposes */
+ /**
+ * For testing purposes.
+ *
+ * @return renew queue length.
+ */
@VisibleForTesting
protected int getRenewQueueLength() {
return queue.size();
@@ -211,7 +219,13 @@ static synchronized void reset() {
}
}
- /** Add a renew action to the queue. */
+ /**
+ * Add a renew action to the queue.
+ *
+ * @param generic type T.
+ * @param fs file system.
+ * @return renew action.
+ * */
@SuppressWarnings("static-access")
public RenewAction addRenewAction(final T fs) {
synchronized (this) {
@@ -230,8 +244,10 @@ public RenewAction addRenewAction(final T
/**
* Remove the associated renew action from the queue
- *
- * @throws IOException
+ *
+ * @param generic type T.
+ * @param fs file system.
+ * @throws IOException raised on errors performing I/O.
*/
public void removeRenewAction(
final T fs) throws IOException {
@@ -240,9 +256,8 @@ public void removeRenewAction(
try {
action.cancel();
} catch (InterruptedException ie) {
- LOG.error("Interrupted while canceling token for " + fs.getUri()
- + "filesystem");
- LOG.debug("Exception in removeRenewAction: {}", ie);
+ LOG.error("Interrupted while canceling token for {} filesystem.", fs.getUri());
+ LOG.debug("Exception in removeRenewAction.", ie);
}
}
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/EtagSource.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/EtagSource.java
new file mode 100644
index 0000000000000..d7efdc705d8e5
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/EtagSource.java
@@ -0,0 +1,38 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs;
+
+/**
+ * An optional interface for {@link FileStatus} subclasses to implement
+ * to provide access to etags.
+ * If available FS SHOULD also implement the matching PathCapabilities
+ * -- etag supported: {@link CommonPathCapabilities#ETAGS_AVAILABLE}.
+ * -- etag consistent over rename:
+ * {@link CommonPathCapabilities#ETAGS_PRESERVED_IN_RENAME}.
+ */
+public interface EtagSource {
+
+ /**
+ * Return an etag of this file status.
+ * A return value of null or "" means "no etag"
+ * @return a possibly null or empty etag.
+ */
+ String getEtag();
+
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSBuilder.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSBuilder.java
index b7757a62e28ad..56ef51f128db8 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSBuilder.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSBuilder.java
@@ -37,12 +37,17 @@ public interface FSBuilder> {
/**
* Set optional Builder parameter.
+ * @param key key.
+ * @param value value.
+ * @return generic type B.
*/
B opt(@Nonnull String key, @Nonnull String value);
/**
* Set optional boolean parameter for the Builder.
- *
+ * @param key key.
+ * @param value value.
+ * @return generic type B.
* @see #opt(String, String)
*/
B opt(@Nonnull String key, boolean value);
@@ -50,6 +55,9 @@ public interface FSBuilder> {
/**
* Set optional int parameter for the Builder.
*
+ * @param key key.
+ * @param value value.
+ * @return generic type B.
* @see #opt(String, String)
*/
B opt(@Nonnull String key, int value);
@@ -57,13 +65,29 @@ public interface FSBuilder> {
/**
* Set optional float parameter for the Builder.
*
+ * @param key key.
+ * @param value value.
+ * @return generic type B.
* @see #opt(String, String)
*/
B opt(@Nonnull String key, float value);
+ /**
+ * Set optional long parameter for the Builder.
+ *
+ * @param key key.
+ * @param value value.
+ * @return generic type B.
+ * @see #opt(String, String)
+ */
+ B opt(@Nonnull String key, long value);
+
/**
* Set optional double parameter for the Builder.
*
+ * @param key key.
+ * @param value value.
+ * @return generic type B.
* @see #opt(String, String)
*/
B opt(@Nonnull String key, double value);
@@ -71,6 +95,9 @@ public interface FSBuilder> {
/**
* Set an array of string values as optional parameter for the Builder.
*
+ * @param key key.
+ * @param values values.
+ * @return generic type B.
* @see #opt(String, String)
*/
B opt(@Nonnull String key, @Nonnull String... values);
@@ -80,12 +107,19 @@ public interface FSBuilder> {
*
* If the option is not supported or unavailable,
* the client should expect {@link #build()} throws IllegalArgumentException.
+ *
+ * @param key key.
+ * @param value value.
+ * @return generic type B.
*/
B must(@Nonnull String key, @Nonnull String value);
/**
* Set mandatory boolean option.
*
+ * @param key key.
+ * @param value value.
+ * @return generic type B.
* @see #must(String, String)
*/
B must(@Nonnull String key, boolean value);
@@ -93,6 +127,9 @@ public interface FSBuilder> {
/**
* Set mandatory int option.
*
+ * @param key key.
+ * @param value value.
+ * @return generic type B.
* @see #must(String, String)
*/
B must(@Nonnull String key, int value);
@@ -100,13 +137,29 @@ public interface FSBuilder> {
/**
* Set mandatory float option.
*
+ * @param key key.
+ * @param value value.
+ * @return generic type B.
* @see #must(String, String)
*/
B must(@Nonnull String key, float value);
+ /**
+ * Set mandatory long option.
+ *
+ * @param key key.
+ * @param value value.
+ * @return generic type B.
+ * @see #must(String, String)
+ */
+ B must(@Nonnull String key, long value);
+
/**
* Set mandatory double option.
*
+ * @param key key.
+ * @param value value.
+ * @return generic type B.
* @see #must(String, String)
*/
B must(@Nonnull String key, double value);
@@ -114,6 +167,9 @@ public interface FSBuilder> {
/**
* Set a string array as mandatory option.
*
+ * @param key key.
+ * @param values values.
+ * @return generic type B.
* @see #must(String, String)
*/
B must(@Nonnull String key, @Nonnull String... values);
@@ -125,6 +181,7 @@ public interface FSBuilder> {
* @throws UnsupportedOperationException if the filesystem does not support
* the specific operation.
* @throws IOException on filesystem IO errors.
+ * @return generic type S.
*/
S build() throws IllegalArgumentException,
UnsupportedOperationException, IOException;
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataInputStream.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataInputStream.java
index b143a4cb63d19..cca6c28da11a3 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataInputStream.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataInputStream.java
@@ -1,4 +1,4 @@
-/**
+/*
*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
@@ -26,6 +26,8 @@
import java.io.InputStream;
import java.nio.ByteBuffer;
import java.util.EnumSet;
+import java.util.List;
+import java.util.function.IntFunction;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
@@ -51,7 +53,7 @@ public class FSDataInputStream extends DataInputStream
*/
private final IdentityHashStore
extendedReadBuffers
- = new IdentityHashStore(0);
+ = new IdentityHashStore<>(0);
public FSDataInputStream(InputStream in) {
super(in);
@@ -142,7 +144,8 @@ public boolean seekToNewSource(long targetPos) throws IOException {
*
* @return the underlying input stream
*/
- @InterfaceAudience.LimitedPrivate({"HDFS"})
+ @InterfaceAudience.Public
+ @InterfaceStability.Stable
public InputStream getWrappedStream() {
return in;
}
@@ -279,4 +282,20 @@ public void readFully(long position, ByteBuffer buf) throws IOException {
public IOStatistics getIOStatistics() {
return IOStatisticsSupport.retrieveIOStatistics(in);
}
+
+ @Override
+ public int minSeekForVectorReads() {
+ return ((PositionedReadable) in).minSeekForVectorReads();
+ }
+
+ @Override
+ public int maxReadSizeForVectorReads() {
+ return ((PositionedReadable) in).maxReadSizeForVectorReads();
+ }
+
+ @Override
+ public void readVectored(List extends FileRange> ranges,
+ IntFunction allocate) throws IOException {
+ ((PositionedReadable) in).readVectored(ranges, allocate);
+ }
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataOutputStreamBuilder.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataOutputStreamBuilder.java
index c96d499d17ba6..16938a83a69c7 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataOutputStreamBuilder.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSDataOutputStreamBuilder.java
@@ -123,6 +123,9 @@ public abstract class FSDataOutputStreamBuilder
/**
* Constructor.
+ *
+ * @param fileSystem file system.
+ * @param p the path.
*/
protected FSDataOutputStreamBuilder(@Nonnull FileSystem fileSystem,
@Nonnull Path p) {
@@ -149,6 +152,9 @@ protected FsPermission getPermission() {
/**
* Set permission for the file.
+ *
+ * @param perm permission.
+ * @return B Generics Type.
*/
public B permission(@Nonnull final FsPermission perm) {
checkNotNull(perm);
@@ -162,6 +168,9 @@ protected int getBufferSize() {
/**
* Set the size of the buffer to be used.
+ *
+ * @param bufSize buffer size.
+ * @return Generics Type B.
*/
public B bufferSize(int bufSize) {
bufferSize = bufSize;
@@ -174,6 +183,9 @@ protected short getReplication() {
/**
* Set replication factor.
+ *
+ * @param replica replica.
+ * @return Generics Type B.
*/
public B replication(short replica) {
replication = replica;
@@ -186,6 +198,9 @@ protected long getBlockSize() {
/**
* Set block size.
+ *
+ * @param blkSize block size.
+ * @return B Generics Type.
*/
public B blockSize(long blkSize) {
blockSize = blkSize;
@@ -194,6 +209,8 @@ public B blockSize(long blkSize) {
/**
* Return true to create the parent directories if they do not exist.
+ *
+ * @return if create the parent directories if they do not exist true,not false.
*/
protected boolean isRecursive() {
return recursive;
@@ -201,6 +218,8 @@ protected boolean isRecursive() {
/**
* Create the parent directory if they do not exist.
+ *
+ * @return B Generics Type.
*/
public B recursive() {
recursive = true;
@@ -213,6 +232,9 @@ protected Progressable getProgress() {
/**
* Set the facility of reporting progress.
+ *
+ * @param prog progress.
+ * @return B Generics Type.
*/
public B progress(@Nonnull final Progressable prog) {
checkNotNull(prog);
@@ -226,6 +248,8 @@ protected EnumSet getFlags() {
/**
* Create an FSDataOutputStream at the specified path.
+ *
+ * @return return Generics Type B.
*/
public B create() {
flags.add(CreateFlag.CREATE);
@@ -236,6 +260,9 @@ public B create() {
* Set to true to overwrite the existing file.
* Set it to false, an exception will be thrown when calling {@link #build()}
* if the file exists.
+ *
+ * @param overwrite overrite.
+ * @return Generics Type B.
*/
public B overwrite(boolean overwrite) {
if (overwrite) {
@@ -248,6 +275,8 @@ public B overwrite(boolean overwrite) {
/**
* Append to an existing file (optional operation).
+ *
+ * @return Generics Type B.
*/
public B append() {
flags.add(CreateFlag.APPEND);
@@ -260,6 +289,9 @@ protected ChecksumOpt getChecksumOpt() {
/**
* Set checksum opt.
+ *
+ * @param chksumOpt check sum opt.
+ * @return Generics Type B.
*/
public B checksumOpt(@Nonnull final ChecksumOpt chksumOpt) {
checkNotNull(chksumOpt);
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSInputChecker.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSInputChecker.java
index de66eab713ab6..ee16ca8a2cd50 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSInputChecker.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSInputChecker.java
@@ -82,6 +82,7 @@ protected FSInputChecker( Path file, int numOfRetries) {
* @param sum the type of Checksum engine
* @param chunkSize maximun chunk size
* @param checksumSize the number byte of each checksum
+ * @param verifyChecksum verify check sum.
*/
protected FSInputChecker( Path file, int numOfRetries,
boolean verifyChecksum, Checksum sum, int chunkSize, int checksumSize ) {
@@ -118,6 +119,7 @@ protected FSInputChecker( Path file, int numOfRetries,
* @param len maximum number of bytes to read
* @param checksum the data buffer into which to write checksums
* @return number of bytes read
+ * @throws IOException raised on errors performing I/O.
*/
abstract protected int readChunk(long pos, byte[] buf, int offset, int len,
byte[] checksum) throws IOException;
@@ -129,7 +131,10 @@ abstract protected int readChunk(long pos, byte[] buf, int offset, int len,
*/
abstract protected long getChunkPosition(long pos);
- /** Return true if there is a need for checksum verification */
+ /**
+ * Return true if there is a need for checksum verification.
+ * @return if there is a need for checksum verification true, not false.
+ */
protected synchronized boolean needChecksum() {
return verifyChecksum && sum != null;
}
@@ -357,6 +362,9 @@ private void verifySums(final byte b[], final int off, int read)
* Convert a checksum byte array to a long
* This is deprecated since 0.22 since it is no longer in use
* by this class.
+ *
+ * @param checksum check sum.
+ * @return crc.
*/
@Deprecated
static public long checksum2long(byte[] checksum) {
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSLinkResolver.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSLinkResolver.java
index ffe4b34ca5fdb..f85cf7a858152 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSLinkResolver.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSLinkResolver.java
@@ -74,7 +74,7 @@ abstract public T next(final AbstractFileSystem fs, final Path p)
* @param fc FileContext used to access file systems.
* @param path The path to resolve symlinks on.
* @return Generic type determined by the implementation of next.
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
public T resolve(final FileContext fc, final Path path) throws IOException {
int count = 0;
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSOutputSummer.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSOutputSummer.java
index 6de026b9d17c0..4ef512dc257a3 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSOutputSummer.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FSOutputSummer.java
@@ -186,6 +186,8 @@ public void flush() throws IOException {
/**
* Return the number of valid bytes currently in the buffer.
+ *
+ * @return buffer data size.
*/
protected synchronized int getBufferedDataSize() {
return count;
@@ -227,6 +229,10 @@ private void writeChecksumChunks(byte b[], int off, int len)
/**
* Converts a checksum integer value to a byte stream
+ *
+ * @param sum check sum.
+ * @param checksumSize check sum size.
+ * @return byte stream.
*/
static public byte[] convertToByteStream(Checksum sum, int checksumSize) {
return int2byte((int)sum.getValue(), new byte[checksumSize]);
@@ -245,6 +251,8 @@ static byte[] int2byte(int integer, byte[] bytes) {
/**
* Resets existing buffer with a new one of the specified size.
+ *
+ * @param size size.
*/
protected synchronized void setChecksumBufSize(int size) {
this.buf = new byte[size];
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileChecksum.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileChecksum.java
index 6822fa485622f..62d2e3af78671 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileChecksum.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileChecksum.java
@@ -28,20 +28,37 @@
@InterfaceAudience.Public
@InterfaceStability.Stable
public abstract class FileChecksum implements Writable {
- /** The checksum algorithm name */
+ /**
+ * The checksum algorithm name.
+ *
+ * @return algorithm name.
+ */
public abstract String getAlgorithmName();
- /** The length of the checksum in bytes */
+ /**
+ * The length of the checksum in bytes.
+ *
+ * @return length.
+ */
public abstract int getLength();
- /** The value of the checksum in bytes */
+ /**
+ * The value of the checksum in bytes.
+ *
+ * @return byte array.
+ */
public abstract byte[] getBytes();
public ChecksumOpt getChecksumOpt() {
return null;
}
- /** Return true if both the algorithms and the values are the same. */
+ /**
+ * Return true if both the algorithms and the values are the same.
+ *
+ * @param other other.
+ * @return if equal true, not false.
+ */
@Override
public boolean equals(Object other) {
if (other == this) {
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileContext.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileContext.java
index 9922dfa0ac8b8..22ac2ecbd7949 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileContext.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileContext.java
@@ -70,7 +70,12 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_BUFFER_SIZE;
+import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY;
+import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_LENGTH;
+import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_WHOLE_FILE;
import static org.apache.hadoop.fs.impl.PathCapabilitiesSupport.validatePathCapabilityArgs;
+import static org.apache.hadoop.util.functional.FutureIO.awaitFuture;
/**
* The FileContext class provides an interface for users of the Hadoop
@@ -361,8 +366,8 @@ public AbstractFileSystem run() throws UnsupportedFileSystemException {
* Create a FileContext with specified FS as default using the specified
* config.
*
- * @param defFS
- * @param aConf
+ * @param defFS default fs.
+ * @param aConf configutration.
* @return new FileContext with specified FS as default.
*/
public static FileContext getFileContext(final AbstractFileSystem defFS,
@@ -373,7 +378,7 @@ public static FileContext getFileContext(final AbstractFileSystem defFS,
/**
* Create a FileContext for specified file system using the default config.
*
- * @param defaultFS
+ * @param defaultFS default fs.
* @return a FileContext with the specified AbstractFileSystem
* as the default FS.
*/
@@ -406,6 +411,7 @@ protected static FileContext getFileContext(
*
* @throws UnsupportedFileSystemException If the file system from the default
* configuration is not supported
+ * @return file context.
*/
public static FileContext getFileContext()
throws UnsupportedFileSystemException {
@@ -425,7 +431,7 @@ public static FileContext getLocalFSFileContext()
/**
* Create a FileContext for specified URI using the default config.
*
- * @param defaultFsUri
+ * @param defaultFsUri defaultFsUri.
* @return a FileContext with the specified URI as the default FS.
*
* @throws UnsupportedFileSystemException If the file system for
@@ -439,8 +445,8 @@ public static FileContext getFileContext(final URI defaultFsUri)
/**
* Create a FileContext for specified default URI using the specified config.
*
- * @param defaultFsUri
- * @param aConf
+ * @param defaultFsUri defaultFsUri.
+ * @param aConf configrution.
* @return new FileContext for specified uri
* @throws UnsupportedFileSystemException If the file system with specified is
* not supported
@@ -471,7 +477,7 @@ public static FileContext getFileContext(final URI defaultFsUri,
* {@link #getFileContext(URI, Configuration)} instead of this one.
*
*
- * @param aConf
+ * @param aConf configration.
* @return new FileContext
* @throws UnsupportedFileSystemException If file system in the config
* is not supported
@@ -549,6 +555,7 @@ public void setWorkingDirectory(final Path newWDir) throws IOException {
/**
* Gets the working directory for wd-relative names (such a "foo/bar").
+ * @return the path.
*/
public Path getWorkingDirectory() {
return workingDir;
@@ -595,13 +602,14 @@ public void setUMask(final FsPermission newUmask) {
* @throws FileNotFoundException If f does not exist
* @throws AccessControlException if access denied
* @throws IOException If an IO Error occurred
- *
+ * @throws UnresolvedLinkException If unresolved link occurred.
+ *
* Exceptions applicable to file systems accessed over RPC:
* @throws RpcClientException If an exception occurred in the RPC client
* @throws RpcServerException If an exception occurred in the RPC server
* @throws UnexpectedServerException If server implementation throws
* undeclared exception to RPC server
- *
+ *
* RuntimeExceptions:
* @throws InvalidPathException If path f is not valid
*/
@@ -615,7 +623,7 @@ public Path resolvePath(final Path f) throws FileNotFoundException,
* A Fully-qualified path has scheme and authority specified and an absolute
* path.
* Use the default file system and working dir in this FileContext to qualify.
- * @param path
+ * @param path the path.
* @return qualified path
*/
public Path makeQualified(final Path path) {
@@ -754,6 +762,7 @@ public FSDataOutputStream build() throws IOException {
*
* Client should expect {@link FSDataOutputStreamBuilder#build()} throw the
* same exceptions as create(Path, EnumSet, CreateOpts...).
+ * @throws IOException If an I/O error occurred.
*/
public FSDataOutputStreamBuilder create(final Path f)
throws IOException {
@@ -827,6 +836,8 @@ public Void next(final AbstractFileSystem fs, final Path p)
*
* RuntimeExceptions:
* @throws InvalidPathException If path f is invalid
+ *
+ * @return if delete success true, not false.
*/
public boolean delete(final Path f, final boolean recursive)
throws AccessControlException, FileNotFoundException,
@@ -857,6 +868,7 @@ public Boolean next(final AbstractFileSystem fs, final Path p)
* @throws RpcServerException If an exception occurred in the RPC server
* @throws UnexpectedServerException If server implementation throws
* undeclared exception to RPC server
+ * @return input stream.
*/
public FSDataInputStream open(final Path f) throws AccessControlException,
FileNotFoundException, UnsupportedFileSystemException, IOException {
@@ -887,6 +899,7 @@ public FSDataInputStream next(final AbstractFileSystem fs, final Path p)
* @throws RpcServerException If an exception occurred in the RPC server
* @throws UnexpectedServerException If server implementation throws
* undeclared exception to RPC server
+ * @return output stream.
*/
public FSDataInputStream open(final Path f, final int bufferSize)
throws AccessControlException, FileNotFoundException,
@@ -996,6 +1009,7 @@ public Boolean next(final AbstractFileSystem fs, final Path p)
*
* @param src path to be renamed
* @param dst new path after rename
+ * @param options rename options.
*
* @throws AccessControlException If access is denied
* @throws FileAlreadyExistsException If dst already exists and
@@ -1047,7 +1061,7 @@ public Void next(final AbstractFileSystem fs, final Path p)
/**
* Set permission of a path.
- * @param f
+ * @param f the path.
* @param permission - the new absolute permission (umask is not applied)
*
* @throws AccessControlException If access is denied
@@ -1191,7 +1205,7 @@ public FileChecksum next(final AbstractFileSystem fs, final Path p)
* Set the verify checksum flag for the file system denoted by the path.
* This is only applicable if the
* corresponding FileSystem supports checksum. By default doesn't do anything.
- * @param verifyChecksum
+ * @param verifyChecksum verify check sum.
* @param f set the verifyChecksum for the Filesystem containing this path
*
* @throws AccessControlException If access is denied
@@ -1246,8 +1260,9 @@ public FileStatus next(final AbstractFileSystem fs, final Path p)
/**
* Synchronize client metadata state.
*
- * @throws IOException
- * @throws UnsupportedOperationException
+ * @throws IOException If an I/O error occurred.
+ * @throws UnsupportedOperationException If file system for f is
+ * not supported.
*/
public void msync() throws IOException, UnsupportedOperationException {
defaultFS.msync();
@@ -1608,9 +1623,12 @@ public RemoteIterator next(
}
/**
+ * List CorruptFile Blocks.
+ *
+ * @param path the path.
* @return an iterator over the corrupt files under the given path
* (may contain duplicates if a file has more than one corrupt block)
- * @throws IOException
+ * @throws IOException If an I/O error occurred.
*/
public RemoteIterator listCorruptFileBlocks(Path path)
throws IOException {
@@ -1734,6 +1752,7 @@ public class Util {
* @throws RpcServerException If an exception occurred in the RPC server
* @throws UnexpectedServerException If server implementation throws
* undeclared exception to RPC server
+ * @return if f exists true, not false.
*/
public boolean exists(final Path f) throws AccessControlException,
UnsupportedFileSystemException, IOException {
@@ -1794,6 +1813,12 @@ public ContentSummary getContentSummary(Path f)
/**
* See {@link #listStatus(Path[], PathFilter)}
+ *
+ * @param files files.
+ * @throws AccessControlException If access is denied.
+ * @throws FileNotFoundException If files does not exist.
+ * @throws IOException If an I/O error occurred.
+ * @return file status array.
*/
public FileStatus[] listStatus(Path[] files) throws AccessControlException,
FileNotFoundException, IOException {
@@ -2049,36 +2074,29 @@ public LocatedFileStatus next() throws IOException {
*
?
*
Matches any single character.
*
- *
*
*
*
Matches zero or more characters.
*
- *
*
[abc]
*
Matches a single character from character set
* {a,b,c}.
*
- *
*
[a-b]
*
Matches a single character from the character range
* {a...b}. Note: character a must be
* lexicographically less than or equal to character b.
*
- *
*
[^a]
*
Matches a single char that is not from character set or range
* {a}. Note that the ^ character must occur
* immediately to the right of the opening bracket.
*
- *
*
\c
*
Removes (escapes) any special meaning of character c.
*
- *
*
{ab,cd}
*
Matches a string from the string set {ab, cd}
- *
- *
+ *
*
{ab,c{de,fh}}
*
Matches a string from string set {ab, cde, cfh}
*
@@ -2139,6 +2157,18 @@ public FileStatus[] globStatus(final Path pathPattern,
/**
* Copy file from src to dest. See
* {@link #copy(Path, Path, boolean, boolean)}
+ *
+ * @param src src.
+ * @param dst dst.
+ * @throws AccessControlException If access is denied.
+ * @throws FileAlreadyExistsException If file src already exists.
+ * @throws FileNotFoundException if next file does not exist any more.
+ * @throws ParentNotDirectoryException If parent of src is not a
+ * directory.
+ * @throws UnsupportedFileSystemException If file system for
+ * src/dst is not supported.
+ * @throws IOException If an I/O error occurred.
+ * @return if success copy true, not false.
*/
public boolean copy(final Path src, final Path dst)
throws AccessControlException, FileAlreadyExistsException,
@@ -2149,8 +2179,8 @@ public boolean copy(final Path src, final Path dst)
/**
* Copy from src to dst, optionally deleting src and overwriting dst.
- * @param src
- * @param dst
+ * @param src src.
+ * @param dst dst.
* @param deleteSource - delete src if true
* @param overwrite overwrite dst if true; throw IOException if dst exists
* and overwrite is false.
@@ -2198,7 +2228,12 @@ public boolean copy(final Path src, final Path dst, boolean deleteSource,
EnumSet createFlag = overwrite ? EnumSet.of(
CreateFlag.CREATE, CreateFlag.OVERWRITE) :
EnumSet.of(CreateFlag.CREATE);
- InputStream in = open(qSrc);
+ InputStream in = awaitFuture(openFile(qSrc)
+ .opt(FS_OPTION_OPENFILE_READ_POLICY,
+ FS_OPTION_OPENFILE_READ_POLICY_WHOLE_FILE)
+ .opt(FS_OPTION_OPENFILE_LENGTH,
+ fs.getLen()) // file length hint for object stores
+ .build());
try (OutputStream out = create(qDst, createFlag)) {
IOUtils.copyBytes(in, out, conf, true);
} finally {
@@ -2266,7 +2301,7 @@ private static void checkDependencies(Path qualSrc, Path qualDst)
* Are qualSrc and qualDst of the same file system?
* @param qualPath1 - fully qualified path
* @param qualPath2 - fully qualified path
- * @return
+ * @return is same fs true,not false.
*/
private static boolean isSameFS(Path qualPath1, Path qualPath2) {
URI srcUri = qualPath1.toUri();
@@ -2289,6 +2324,13 @@ public synchronized void run() {
/**
* Resolves all symbolic links in the specified path.
* Returns the new path object.
+ *
+ * @param f the path.
+ * @throws FileNotFoundException If f does not exist.
+ * @throws UnresolvedLinkException If unresolved link occurred.
+ * @throws AccessControlException If access is denied.
+ * @throws IOException If an I/O error occurred.
+ * @return resolve path.
*/
protected Path resolve(final Path f) throws FileNotFoundException,
UnresolvedLinkException, AccessControlException, IOException {
@@ -2306,6 +2348,7 @@ public Path next(final AbstractFileSystem fs, final Path p)
* to, but not including the final path component.
* @param f path to resolve
* @return the new path object.
+ * @throws IOException If an I/O error occurred.
*/
protected Path resolveIntermediate(final Path f) throws IOException {
return new FSLinkResolver() {
@@ -2324,13 +2367,12 @@ public FileStatus next(final AbstractFileSystem fs, final Path p)
* @param f
* Path which needs to be resolved
* @return List of AbstractFileSystems accessed in the path
- * @throws IOException
+ * @throws IOException If an I/O error occurred.
*/
Set resolveAbstractFileSystems(final Path f)
throws IOException {
final Path absF = fixRelativePart(f);
- final HashSet result
- = new HashSet();
+ final HashSet result = new HashSet<>();
new FSLinkResolver() {
@Override
public Void next(final AbstractFileSystem fs, final Path p)
@@ -2385,7 +2427,7 @@ public static Map getAllStatistics() {
* @param p Path for which delegations tokens are requested.
* @param renewer the account name that is allowed to renew the token.
* @return List of delegation tokens.
- * @throws IOException
+ * @throws IOException If an I/O error occurred.
*/
@InterfaceAudience.LimitedPrivate( { "HDFS", "MapReduce" })
public List> getDelegationTokens(
@@ -2537,7 +2579,7 @@ public AclStatus next(final AbstractFileSystem fs, final Path p)
* @param path Path to modify
* @param name xattr name.
* @param value xattr value.
- * @throws IOException
+ * @throws IOException If an I/O error occurred.
*/
public void setXAttr(Path path, String name, byte[] value)
throws IOException {
@@ -2556,7 +2598,7 @@ public void setXAttr(Path path, String name, byte[] value)
* @param name xattr name.
* @param value xattr value.
* @param flag xattr set flag
- * @throws IOException
+ * @throws IOException If an I/O error occurred.
*/
public void setXAttr(Path path, final String name, final byte[] value,
final EnumSet flag) throws IOException {
@@ -2581,7 +2623,7 @@ public Void next(final AbstractFileSystem fs, final Path p)
* @param path Path to get extended attribute
* @param name xattr name.
* @return byte[] xattr value.
- * @throws IOException
+ * @throws IOException If an I/O error occurred.
*/
public byte[] getXAttr(Path path, final String name) throws IOException {
final Path absF = fixRelativePart(path);
@@ -2604,7 +2646,7 @@ public byte[] next(final AbstractFileSystem fs, final Path p)
* @param path Path to get extended attributes
* @return Map{@literal <}String, byte[]{@literal >} describing the XAttrs
* of the file or directory
- * @throws IOException
+ * @throws IOException If an I/O error occurred.
*/
public Map getXAttrs(Path path) throws IOException {
final Path absF = fixRelativePart(path);
@@ -2628,7 +2670,7 @@ public Map next(final AbstractFileSystem fs, final Path p)
* @param names XAttr names.
* @return Map{@literal <}String, byte[]{@literal >} describing the XAttrs
* of the file or directory
- * @throws IOException
+ * @throws IOException If an I/O error occurred.
*/
public Map getXAttrs(Path path, final List names)
throws IOException {
@@ -2651,7 +2693,7 @@ public Map next(final AbstractFileSystem fs, final Path p)
*
* @param path Path to remove extended attribute
* @param name xattr name
- * @throws IOException
+ * @throws IOException If an I/O error occurred.
*/
public void removeXAttr(Path path, final String name) throws IOException {
final Path absF = fixRelativePart(path);
@@ -2675,7 +2717,7 @@ public Void next(final AbstractFileSystem fs, final Path p)
* @param path Path to get extended attributes
* @return List{@literal <}String{@literal >} of the XAttr names of the
* file or directory
- * @throws IOException
+ * @throws IOException If an I/O error occurred.
*/
public List listXAttrs(Path path) throws IOException {
final Path absF = fixRelativePart(path);
@@ -2792,7 +2834,7 @@ public Void next(final AbstractFileSystem fs, final Path p)
/**
* Set the source path to satisfy storage policy.
* @param path The source path referring to either a directory or a file.
- * @throws IOException
+ * @throws IOException If an I/O error occurred.
*/
public void satisfyStoragePolicy(final Path path)
throws IOException {
@@ -2814,6 +2856,7 @@ public Void next(final AbstractFileSystem fs, final Path p)
* @param policyName the name of the target storage policy. The list
* of supported Storage policies can be retrieved
* via {@link #getAllStoragePolicies}.
+ * @throws IOException If an I/O error occurred.
*/
public void setStoragePolicy(final Path path, final String policyName)
throws IOException {
@@ -2831,7 +2874,7 @@ public Void next(final AbstractFileSystem fs, final Path p)
/**
* Unset the storage policy set for a given file or directory.
* @param src file or directory path.
- * @throws IOException
+ * @throws IOException If an I/O error occurred.
*/
public void unsetStoragePolicy(final Path src) throws IOException {
final Path absF = fixRelativePart(src);
@@ -2850,7 +2893,7 @@ public Void next(final AbstractFileSystem fs, final Path p)
*
* @param path file or directory path.
* @return storage policy for give file.
- * @throws IOException
+ * @throws IOException If an I/O error occurred.
*/
public BlockStoragePolicySpi getStoragePolicy(Path path) throws IOException {
final Path absF = fixRelativePart(path);
@@ -2868,7 +2911,7 @@ public BlockStoragePolicySpi next(final AbstractFileSystem fs,
* Retrieve all the storage policies supported by this file system.
*
* @return all storage policies supported by this filesystem.
- * @throws IOException
+ * @throws IOException If an I/O error occurred.
*/
public Collection extends BlockStoragePolicySpi> getAllStoragePolicies()
throws IOException {
@@ -2930,9 +2973,11 @@ public CompletableFuture build() throws IOException {
final Path absF = fixRelativePart(getPath());
OpenFileParameters parameters = new OpenFileParameters()
.withMandatoryKeys(getMandatoryKeys())
+ .withOptionalKeys(getOptionalKeys())
.withOptions(getOptions())
- .withBufferSize(getBufferSize())
- .withStatus(getStatus());
+ .withStatus(getStatus())
+ .withBufferSize(
+ getOptions().getInt(FS_OPTION_OPENFILE_BUFFER_SIZE, getBufferSize()));
return new FSLinkResolver>() {
@Override
public CompletableFuture next(
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileEncryptionInfo.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileEncryptionInfo.java
index 9260b9a62c62e..f50c06cec3810 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileEncryptionInfo.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileEncryptionInfo.java
@@ -52,6 +52,7 @@ public class FileEncryptionInfo implements Serializable {
* @param keyName name of the key used for the encryption zone
* @param ezKeyVersionName name of the KeyVersion used to encrypt the
* encrypted data encryption key.
+ * @param version version.
*/
public FileEncryptionInfo(final CipherSuite suite,
final CryptoProtocolVersion version, final byte[] edek,
@@ -134,6 +135,8 @@ public String toString() {
*
* NOTE:
* Currently this method is used by CLI for backward compatibility.
+ *
+ * @return stable string.
*/
public String toStringStable() {
StringBuilder builder = new StringBuilder("{")
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileRange.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileRange.java
new file mode 100644
index 0000000000000..97da65585d6d2
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileRange.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs;
+
+import java.nio.ByteBuffer;
+import java.util.concurrent.CompletableFuture;
+
+import org.apache.hadoop.fs.impl.FileRangeImpl;
+
+/**
+ * A byte range of a file.
+ * This is used for the asynchronous gather read API of
+ * {@link PositionedReadable#readVectored}.
+ */
+public interface FileRange {
+
+ /**
+ * Get the starting offset of the range.
+ * @return the byte offset of the start
+ */
+ long getOffset();
+
+ /**
+ * Get the length of the range.
+ * @return the number of bytes in the range.
+ */
+ int getLength();
+
+ /**
+ * Get the future data for this range.
+ * @return the future for the {@link ByteBuffer} that contains the data
+ */
+ CompletableFuture getData();
+
+ /**
+ * Set a future for this range's data.
+ * This method is called by {@link PositionedReadable#readVectored} to store the
+ * data for the user to pick up later via {@link #getData}.
+ * @param data the future of the ByteBuffer that will have the data
+ */
+ void setData(CompletableFuture data);
+
+ /**
+ * Get any reference passed in to the file range constructor.
+ * This is not used by any implementation code; it is to help
+ * bind this API to libraries retrieving multiple stripes of
+ * data in parallel.
+ * @return a reference or null.
+ */
+ Object getReference();
+
+ /**
+ * Factory method to create a FileRange object.
+ * @param offset starting offset of the range.
+ * @param length length of the range.
+ * @return a new instance of FileRangeImpl.
+ */
+ static FileRange createFileRange(long offset, int length) {
+ return new FileRangeImpl(offset, length, null);
+ }
+
+ /**
+ * Factory method to create a FileRange object.
+ * @param offset starting offset of the range.
+ * @param length length of the range.
+ * @param reference nullable reference to store in the range.
+ * @return a new instance of FileRangeImpl.
+ */
+ static FileRange createFileRange(long offset, int length, Object reference) {
+ return new FileRangeImpl(offset, length, reference);
+ }
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileStatus.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileStatus.java
index d7ca8f172f8e2..fcef578b072f1 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileStatus.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileStatus.java
@@ -116,6 +116,17 @@ public FileStatus(long length, boolean isdir, int block_replication,
/**
* Constructor for file systems on which symbolic links are not supported
+ *
+ * @param length length.
+ * @param isdir isdir.
+ * @param block_replication block replication.
+ * @param blocksize block size.
+ * @param modification_time modification time.
+ * @param access_time access_time.
+ * @param permission permission.
+ * @param owner owner.
+ * @param group group.
+ * @param path the path.
*/
public FileStatus(long length, boolean isdir,
int block_replication,
@@ -182,6 +193,7 @@ public FileStatus(long length, boolean isdir, int block_replication,
* Copy constructor.
*
* @param other FileStatus to copy
+ * @throws IOException raised on errors performing I/O.
*/
public FileStatus(FileStatus other) throws IOException {
// It's important to call the getters here instead of directly accessing the
@@ -375,6 +387,8 @@ protected void setGroup(String group) {
/**
* @return The contents of the symbolic link.
+ *
+ * @throws IOException raised on errors performing I/O.
*/
public Path getSymlink() throws IOException {
if (!isSymlink()) {
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java
index fdb1a47552025..df8530784615d 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystem.java
@@ -21,7 +21,6 @@
import java.io.Closeable;
import java.io.FileNotFoundException;
import java.io.IOException;
-import java.io.InterruptedIOException;
import java.lang.ref.WeakReference;
import java.lang.ref.ReferenceQueue;
import java.net.URI;
@@ -88,6 +87,7 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_BUFFER_SIZE;
import static org.apache.hadoop.util.Preconditions.checkArgument;
import static org.apache.hadoop.fs.CommonConfigurationKeysPublic.*;
import static org.apache.hadoop.fs.impl.PathCapabilitiesSupport.validatePathCapabilityArgs;
@@ -103,13 +103,13 @@
* All user code that may potentially use the Hadoop Distributed
* File System should be written to use a FileSystem object or its
* successor, {@link FileContext}.
- *
+ *
*
* The local implementation is {@link LocalFileSystem} and distributed
* implementation is DistributedFileSystem. There are other implementations
* for object stores and (outside the Apache Hadoop codebase),
* third party filesystems.
- *
+ *
* Notes
*
*
The behaviour of the filesystem is
@@ -132,13 +132,12 @@
* New methods may be marked as Unstable or Evolving for their initial release,
* as a warning that they are new and may change based on the
* experience of use in applications.
- *
+ *
* Important note for developers
- *
+ *
* If you are making changes here to the public API or protected methods,
* you must review the following subclasses and make sure that
* they are filtering/passing through new methods as appropriate.
- *
*
* {@link FilterFileSystem}: methods are passed through. If not,
* then {@code TestFilterFileSystem.MustNotImplement} must be
@@ -147,21 +146,22 @@
* {@link #hasPathCapability(Path, String)} then
* {@link FilterFileSystem#hasPathCapability(Path, String)}
* must return false, always.
- *
+ *
* {@link ChecksumFileSystem}: checksums are created and
* verified.
- *
+ *
* {@code TestHarFileSystem} will need its {@code MustNotImplement}
* interface updated.
- *
*
+ *
* There are some external places your changes will break things.
* Do co-ordinate changes here.
- *
+ *
*
* HBase: HBoss
- *
+ *
* Hive: HiveShim23
+ *
* {@code shims/0.23/src/main/java/org/apache/hadoop/hive/shims/Hadoop23Shims.java}
*
*****************************************************************/
@@ -280,6 +280,8 @@ public FileSystem run() throws IOException {
/**
* Returns the configured FileSystem implementation.
* @param conf the configuration to use
+ * @return FileSystem.
+ * @throws IOException If an I/O error occurred.
*/
public static FileSystem get(Configuration conf) throws IOException {
return get(getDefaultUri(conf), conf);
@@ -374,6 +376,7 @@ public String getScheme() {
* implement that method.
*
* @see #canonicalizeUri(URI)
+ * @return the URI of this filesystem.
*/
protected URI getCanonicalUri() {
return canonicalizeUri(getUri());
@@ -390,6 +393,7 @@ protected URI getCanonicalUri() {
* not specified and if {@link #getDefaultPort()} returns a
* default port.
*
+ * @param uri url.
* @return URI
* @see NetUtils#getCanonicalUri(URI, int)
*/
@@ -453,11 +457,21 @@ public String getCanonicalServiceName() {
: null;
}
- /** @deprecated call {@link #getUri()} instead.*/
+ /**
+ * @return uri to string.
+ * @deprecated call {@link #getUri()} instead.
+ */
@Deprecated
public String getName() { return getUri().toString(); }
- /** @deprecated call {@link #get(URI, Configuration)} instead. */
+ /**
+ * @deprecated call {@link #get(URI, Configuration)} instead.
+ *
+ * @param name name.
+ * @param conf configuration.
+ * @return file system.
+ * @throws IOException If an I/O error occurred.
+ */
@Deprecated
public static FileSystem getNamed(String name, Configuration conf)
throws IOException {
@@ -512,6 +526,9 @@ public static LocalFileSystem getLocal(Configuration conf)
* configuration and URI, cached and returned to the caller.
*
*
+ * @param uri uri of the filesystem.
+ * @param conf configrution.
+ * @return filesystem instance.
* @throws IOException if the FileSystem cannot be instantiated.
*/
public static FileSystem get(URI uri, Configuration conf) throws IOException {
@@ -541,7 +558,7 @@ public static FileSystem get(URI uri, Configuration conf) throws IOException {
/**
* Returns the FileSystem for this URI's scheme and authority and the
* given user. Internally invokes {@link #newInstance(URI, Configuration)}
- * @param uri of the filesystem
+ * @param uri uri of the filesystem.
* @param conf the configuration to use
* @param user to perform the get as
* @return filesystem instance
@@ -859,6 +876,7 @@ protected void checkPath(Path path) {
* @param start offset into the given file
* @param len length for which to get locations for
* @throws IOException IO failure
+ * @return block location array.
*/
public BlockLocation[] getFileBlockLocations(FileStatus file,
long start, long len) throws IOException {
@@ -899,6 +917,7 @@ public BlockLocation[] getFileBlockLocations(FileStatus file,
* @param len length for which to get locations for
* @throws FileNotFoundException when the path does not exist
* @throws IOException IO failure
+ * @return block location array.
*/
public BlockLocation[] getFileBlockLocations(Path p,
long start, long len) throws IOException {
@@ -961,6 +980,7 @@ public Path resolvePath(final Path p) throws IOException {
* @param f the file name to open
* @param bufferSize the size of the buffer to be used.
* @throws IOException IO failure
+ * @return input stream.
*/
public abstract FSDataInputStream open(Path f, int bufferSize)
throws IOException;
@@ -969,6 +989,7 @@ public abstract FSDataInputStream open(Path f, int bufferSize)
* Opens an FSDataInputStream at the indicated Path.
* @param f the file to open
* @throws IOException IO failure
+ * @return input stream.
*/
public FSDataInputStream open(Path f) throws IOException {
return open(f, getConf().getInt(IO_FILE_BUFFER_SIZE_KEY,
@@ -986,6 +1007,7 @@ public FSDataInputStream open(Path f) throws IOException {
* @throws IOException IO failure
* @throws UnsupportedOperationException If {@link #open(PathHandle, int)}
* not overridden by subclass
+ * @return input stream.
*/
public FSDataInputStream open(PathHandle fd) throws IOException {
return open(fd, getConf().getInt(IO_FILE_BUFFER_SIZE_KEY,
@@ -1003,6 +1025,7 @@ public FSDataInputStream open(PathHandle fd) throws IOException {
* not satisfied
* @throws IOException IO failure
* @throws UnsupportedOperationException If not overridden by subclass
+ * @return input stream.
*/
public FSDataInputStream open(PathHandle fd, int bufferSize)
throws IOException {
@@ -1020,6 +1043,7 @@ public FSDataInputStream open(PathHandle fd, int bufferSize)
* not overridden by subclass.
* @throws UnsupportedOperationException If this FileSystem cannot enforce
* the specified constraints.
+ * @return path handle.
*/
public final PathHandle getPathHandle(FileStatus stat, HandleOpt... opt) {
// method is final with a default so clients calling getPathHandle(stat)
@@ -1035,6 +1059,7 @@ public final PathHandle getPathHandle(FileStatus stat, HandleOpt... opt) {
* @param stat Referent in the target FileSystem
* @param opt Constraints that determine the validity of the
* {@link PathHandle} reference.
+ * @return path handle.
*/
protected PathHandle createPathHandle(FileStatus stat, HandleOpt... opt) {
throw new UnsupportedOperationException();
@@ -1045,6 +1070,7 @@ protected PathHandle createPathHandle(FileStatus stat, HandleOpt... opt) {
* Files are overwritten by default.
* @param f the file to create
* @throws IOException IO failure
+ * @return output stream.
*/
public FSDataOutputStream create(Path f) throws IOException {
return create(f, true);
@@ -1056,6 +1082,7 @@ public FSDataOutputStream create(Path f) throws IOException {
* @param overwrite if a file with this name already exists, then if true,
* the file will be overwritten, and if false an exception will be thrown.
* @throws IOException IO failure
+ * @return output stream.
*/
public FSDataOutputStream create(Path f, boolean overwrite)
throws IOException {
@@ -1073,6 +1100,7 @@ public FSDataOutputStream create(Path f, boolean overwrite)
* @param f the file to create
* @param progress to report progress
* @throws IOException IO failure
+ * @return output stream.
*/
public FSDataOutputStream create(Path f, Progressable progress)
throws IOException {
@@ -1089,6 +1117,7 @@ public FSDataOutputStream create(Path f, Progressable progress)
* @param f the file to create
* @param replication the replication factor
* @throws IOException IO failure
+ * @return output stream1
*/
public FSDataOutputStream create(Path f, short replication)
throws IOException {
@@ -1107,6 +1136,7 @@ public FSDataOutputStream create(Path f, short replication)
* @param replication the replication factor
* @param progress to report progress
* @throws IOException IO failure
+ * @return output stream.
*/
public FSDataOutputStream create(Path f, short replication,
Progressable progress) throws IOException {
@@ -1124,6 +1154,7 @@ public FSDataOutputStream create(Path f, short replication,
* the file will be overwritten, and if false an error will be thrown.
* @param bufferSize the size of the buffer to be used.
* @throws IOException IO failure
+ * @return output stream.
*/
public FSDataOutputStream create(Path f,
boolean overwrite,
@@ -1143,7 +1174,9 @@ public FSDataOutputStream create(Path f,
* @param overwrite if a file with this name already exists, then if true,
* the file will be overwritten, and if false an error will be thrown.
* @param bufferSize the size of the buffer to be used.
+ * @param progress to report progress.
* @throws IOException IO failure
+ * @return output stream.
*/
public FSDataOutputStream create(Path f,
boolean overwrite,
@@ -1163,7 +1196,9 @@ public FSDataOutputStream create(Path f,
* the file will be overwritten, and if false an error will be thrown.
* @param bufferSize the size of the buffer to be used.
* @param replication required block replication for the file.
+ * @param blockSize the size of the buffer to be used.
* @throws IOException IO failure
+ * @return output stream.
*/
public FSDataOutputStream create(Path f,
boolean overwrite,
@@ -1181,7 +1216,10 @@ public FSDataOutputStream create(Path f,
* the file will be overwritten, and if false an error will be thrown.
* @param bufferSize the size of the buffer to be used.
* @param replication required block replication for the file.
+ * @param blockSize the size of the buffer to be used.
+ * @param progress to report progress.
* @throws IOException IO failure
+ * @return output stream.
*/
public FSDataOutputStream create(Path f,
boolean overwrite,
@@ -1208,6 +1246,7 @@ public FSDataOutputStream create(Path f,
* @param progress the progress reporter
* @throws IOException IO failure
* @see #setPermission(Path, FsPermission)
+ * @return output stream.
*/
public abstract FSDataOutputStream create(Path f,
FsPermission permission,
@@ -1229,6 +1268,7 @@ public abstract FSDataOutputStream create(Path f,
* @param progress the progress reporter
* @throws IOException IO failure
* @see #setPermission(Path, FsPermission)
+ * @return output stream.
*/
public FSDataOutputStream create(Path f,
FsPermission permission,
@@ -1255,6 +1295,7 @@ public FSDataOutputStream create(Path f,
* found in conf will be used.
* @throws IOException IO failure
* @see #setPermission(Path, FsPermission)
+ * @return output stream.
*/
public FSDataOutputStream create(Path f,
FsPermission permission,
@@ -1276,6 +1317,16 @@ public FSDataOutputStream create(Path f,
* the permission with umask before calling this method.
* This a temporary method added to support the transition from FileSystem
* to FileContext for user applications.
+ *
+ * @param f path.
+ * @param absolutePermission permission.
+ * @param flag create flag.
+ * @param bufferSize buffer size.
+ * @param replication replication.
+ * @param blockSize block size.
+ * @param progress progress.
+ * @param checksumOpt check sum opt.
+ * @return output stream.
* @throws IOException IO failure
*/
@Deprecated
@@ -1330,6 +1381,11 @@ protected boolean primitiveMkdir(Path f, FsPermission absolutePermission)
* with umask before calling this method.
* This a temporary method added to support the transition from FileSystem
* to FileContext for user applications.
+ *
+ * @param f the path.
+ * @param absolutePermission permission.
+ * @param createParent create parent.
+ * @throws IOException IO failure.
*/
@Deprecated
protected void primitiveMkdir(Path f, FsPermission absolutePermission,
@@ -1369,6 +1425,7 @@ protected void primitiveMkdir(Path f, FsPermission absolutePermission,
* @param progress the progress reporter
* @throws IOException IO failure
* @see #setPermission(Path, FsPermission)
+ * @return output stream.
*/
public FSDataOutputStream createNonRecursive(Path f,
boolean overwrite,
@@ -1392,6 +1449,7 @@ public FSDataOutputStream createNonRecursive(Path f,
* @param progress the progress reporter
* @throws IOException IO failure
* @see #setPermission(Path, FsPermission)
+ * @return output stream.
*/
public FSDataOutputStream createNonRecursive(Path f, FsPermission permission,
boolean overwrite, int bufferSize, short replication, long blockSize,
@@ -1415,6 +1473,7 @@ public FSDataOutputStream createNonRecursive(Path f, FsPermission permission,
* @param progress the progress reporter
* @throws IOException IO failure
* @see #setPermission(Path, FsPermission)
+ * @return output stream.
*/
public FSDataOutputStream createNonRecursive(Path f, FsPermission permission,
EnumSet flags, int bufferSize, short replication, long blockSize,
@@ -1429,6 +1488,7 @@ public FSDataOutputStream createNonRecursive(Path f, FsPermission permission,
* Important: the default implementation is not atomic
* @param f path to use for create
* @throws IOException IO failure
+ * @return if create new file success true,not false.
*/
public boolean createNewFile(Path f) throws IOException {
if (exists(f)) {
@@ -1449,6 +1509,7 @@ public boolean createNewFile(Path f) throws IOException {
* @throws IOException IO failure
* @throws UnsupportedOperationException if the operation is unsupported
* (default).
+ * @return output stream.
*/
public FSDataOutputStream append(Path f) throws IOException {
return append(f, getConf().getInt(IO_FILE_BUFFER_SIZE_KEY,
@@ -1463,6 +1524,7 @@ public FSDataOutputStream append(Path f) throws IOException {
* @throws IOException IO failure
* @throws UnsupportedOperationException if the operation is unsupported
* (default).
+ * @return output stream.
*/
public FSDataOutputStream append(Path f, int bufferSize) throws IOException {
return append(f, bufferSize, null);
@@ -1476,10 +1538,44 @@ public FSDataOutputStream append(Path f, int bufferSize) throws IOException {
* @throws IOException IO failure
* @throws UnsupportedOperationException if the operation is unsupported
* (default).
+ * @return output stream.
*/
public abstract FSDataOutputStream append(Path f, int bufferSize,
Progressable progress) throws IOException;
+ /**
+ * Append to an existing file (optional operation).
+ * @param f the existing file to be appended.
+ * @param appendToNewBlock whether to append data to a new block
+ * instead of the end of the last partial block
+ * @throws IOException IO failure
+ * @throws UnsupportedOperationException if the operation is unsupported
+ * (default).
+ * @return output stream.
+ */
+ public FSDataOutputStream append(Path f, boolean appendToNewBlock) throws IOException {
+ return append(f, getConf().getInt(IO_FILE_BUFFER_SIZE_KEY,
+ IO_FILE_BUFFER_SIZE_DEFAULT), null, appendToNewBlock);
+ }
+
+ /**
+ * Append to an existing file (optional operation).
+ * This function is used for being overridden by some FileSystem like DistributedFileSystem
+ * @param f the existing file to be appended.
+ * @param bufferSize the size of the buffer to be used.
+ * @param progress for reporting progress if it is not null.
+ * @param appendToNewBlock whether to append data to a new block
+ * instead of the end of the last partial block
+ * @throws IOException IO failure
+ * @throws UnsupportedOperationException if the operation is unsupported
+ * (default).
+ * @return output stream.
+ */
+ public FSDataOutputStream append(Path f, int bufferSize,
+ Progressable progress, boolean appendToNewBlock) throws IOException {
+ return append(f, bufferSize, progress);
+ }
+
/**
* Concat existing files together.
* @param trg the path to the target destination.
@@ -1514,7 +1610,7 @@ public short getReplication(Path src) throws IOException {
* This is the default behavior.
* @param src file name
* @param replication new replication
- * @throws IOException
+ * @throws IOException an IO failure.
* @return true if successful, or the feature in unsupported;
* false if replication is supported but the file does not exist,
* or is a directory
@@ -1543,11 +1639,12 @@ public boolean setReplication(Path src, short replication)
*
* If OVERWRITE option is not passed as an argument, rename fails
* if the dst already exists.
+ *
*
* If OVERWRITE option is passed as an argument, rename overwrites
* the dst if it is a file or an empty directory. Rename fails if dst is
* a non-empty directory.
- *
+ *
* Note that atomicity of rename is dependent on the file system
* implementation. Please refer to the file system documentation for
* details. This default implementation is non atomic.
@@ -1555,9 +1652,11 @@ public boolean setReplication(Path src, short replication)
* This method is deprecated since it is a temporary method added to
* support the transition from FileSystem to FileContext for user
* applications.
+ *
*
* @param src path to be renamed
* @param dst new path after rename
+ * @param options rename options.
* @throws FileNotFoundException src path does not exist, or the parent
* path of dst does not exist.
* @throws FileAlreadyExistsException dest path exists and is a file
@@ -1652,6 +1751,9 @@ public boolean truncate(Path f, long newLength) throws IOException {
/**
* Delete a file/directory.
+ * @param f the path.
+ * @throws IOException IO failure.
+ * @return if delete success true, not false.
* @deprecated Use {@link #delete(Path, boolean)} instead.
*/
@Deprecated
@@ -1768,6 +1870,7 @@ public boolean exists(Path f) throws IOException {
* @param f path to check
* @throws IOException IO failure
* @deprecated Use {@link #getFileStatus(Path)} instead
+ * @return if f is directory true, not false.
*/
@Deprecated
public boolean isDirectory(Path f) throws IOException {
@@ -1785,6 +1888,7 @@ public boolean isDirectory(Path f) throws IOException {
* @param f path to check
* @throws IOException IO failure
* @deprecated Use {@link #getFileStatus(Path)} instead
+ * @return if f is file true, not false.
*/
@Deprecated
public boolean isFile(Path f) throws IOException {
@@ -1797,6 +1901,7 @@ public boolean isFile(Path f) throws IOException {
/**
* The number of bytes in a file.
+ * @param f the path.
* @return the number of bytes; 0 for a directory
* @deprecated Use {@link #getFileStatus(Path)} instead.
* @throws FileNotFoundException if the path does not resolve
@@ -1811,6 +1916,7 @@ public long getLength(Path f) throws IOException {
* @param f path to use
* @throws FileNotFoundException if the path does not resolve
* @throws IOException IO failure
+ * @return content summary.
*/
public ContentSummary getContentSummary(Path f) throws IOException {
FileStatus status = getFileStatus(f);
@@ -1945,9 +2051,9 @@ public boolean hasMore() {
* @param f Path to list
* @param token opaque iteration token returned by previous call, or null
* if this is the first call.
- * @return
- * @throws FileNotFoundException
- * @throws IOException
+ * @return directory entries.
+ * @throws FileNotFoundException when the path does not exist.
+ * @throws IOException If an I/O error occurred.
*/
@InterfaceAudience.Private
protected DirectoryEntries listStatusBatch(Path f, byte[] token) throws
@@ -1978,6 +2084,8 @@ private void listStatus(ArrayList results, Path f,
/**
* List corrupted file blocks.
+ *
+ * @param path the path.
* @return an iterator over the corrupt files under the given path
* (may contain duplicates if a file has more than one corrupt block)
* @throws UnsupportedOperationException if the operation is unsupported
@@ -2071,36 +2179,29 @@ public FileStatus[] listStatus(Path[] files, PathFilter filter)
*
?
*
Matches any single character.
*
- *
*
*
*
Matches zero or more characters.
*
- *
*
[abc]
*
Matches a single character from character set
* {a,b,c}.
*
- *
*
[a-b]
*
Matches a single character from the character range
* {a...b}. Note that character a must be
* lexicographically less than or equal to character b.
*
- *
*
[^a]
*
Matches a single character that is not from character set or range
* {a}. Note that the ^ character must occur
* immediately to the right of the opening bracket.
*
- *
*
\c
*
Removes (escapes) any special meaning of character c.
*
- *
*
{ab,cd}
*
Matches a string from the string set {ab, cd}
*
- *
*
{ab,c{de,fh}}
*
Matches a string from the string set {ab, cde, cfh}
*
@@ -2331,6 +2432,7 @@ public LocatedFileStatus next() throws IOException {
/** Return the current user's home directory in this FileSystem.
* The default implementation returns {@code "/user/$USER/"}.
+ * @return the path.
*/
public Path getHomeDirectory() {
String username;
@@ -2393,6 +2495,7 @@ public boolean mkdirs(Path f) throws IOException {
* @param f path to create
* @param permission to apply to f
* @throws IOException IO failure
+ * @return if mkdir success true, not false.
*/
public abstract boolean mkdirs(Path f, FsPermission permission
) throws IOException;
@@ -2440,6 +2543,7 @@ public void moveFromLocalFile(Path src, Path dst)
* @param delSrc whether to delete the src
* @param src path
* @param dst path
+ * @throws IOException IO failure.
*/
public void copyFromLocalFile(boolean delSrc, Path src, Path dst)
throws IOException {
@@ -2554,6 +2658,7 @@ public void copyToLocalFile(boolean delSrc, Path src, Path dst,
* @param fsOutputFile path of output file
* @param tmpLocalFile path of local tmp file
* @throws IOException IO failure
+ * @return the path.
*/
public Path startLocalOutput(Path fsOutputFile, Path tmpLocalFile)
throws IOException {
@@ -2601,6 +2706,7 @@ public void close() throws IOException {
/**
* Return the total size of all files in the filesystem.
* @throws IOException IO failure
+ * @return the number of path used.
*/
public long getUsed() throws IOException {
Path path = new Path("/");
@@ -2609,7 +2715,9 @@ public long getUsed() throws IOException {
/**
* Return the total size of all files from a specified path.
+ * @param path the path.
* @throws IOException IO failure
+ * @return the number of path content summary.
*/
public long getUsed(Path path) throws IOException {
return getContentSummary(path).getLength();
@@ -2632,6 +2740,7 @@ public long getBlockSize(Path f) throws IOException {
* Return the number of bytes that large input files should be optimally
* be split into to minimize I/O time.
* @deprecated use {@link #getDefaultBlockSize(Path)} instead
+ * @return default block size.
*/
@Deprecated
public long getDefaultBlockSize() {
@@ -2684,8 +2793,8 @@ public short getDefaultReplication(Path path) {
* In some FileSystem implementations such as HDFS metadata
* synchronization is essential to guarantee consistency of read requests
* particularly in HA setting.
- * @throws IOException
- * @throws UnsupportedOperationException
+ * @throws IOException If an I/O error occurred.
+ * @throws UnsupportedOperationException if the operation is unsupported.
*/
public void msync() throws IOException, UnsupportedOperationException {
throw new UnsupportedOperationException(getClass().getCanonicalName() +
@@ -2761,6 +2870,8 @@ static void checkAccessPermissions(FileStatus stat, FsAction mode)
/**
* See {@link FileContext#fixRelativePart}.
+ * @param p the path.
+ * @return relative part.
*/
protected Path fixRelativePart(Path p) {
if (p.isUriPathAbsolute()) {
@@ -2772,6 +2883,18 @@ protected Path fixRelativePart(Path p) {
/**
* See {@link FileContext#createSymlink(Path, Path, boolean)}.
+ *
+ * @param target target path.
+ * @param link link.
+ * @param createParent create parent.
+ * @throws AccessControlException if access is denied.
+ * @throws FileAlreadyExistsException when the path does not exist.
+ * @throws FileNotFoundException when the path does not exist.
+ * @throws ParentNotDirectoryException if the parent path of dest is not
+ * a directory.
+ * @throws UnsupportedFileSystemException if there was no known implementation
+ * for the scheme.
+ * @throws IOException raised on errors performing I/O.
*/
public void createSymlink(final Path target, final Path link,
final boolean createParent) throws AccessControlException,
@@ -2785,8 +2908,14 @@ public void createSymlink(final Path target, final Path link,
/**
* See {@link FileContext#getFileLinkStatus(Path)}.
- * @throws FileNotFoundException when the path does not exist
- * @throws IOException see specific implementation
+ *
+ * @param f the path.
+ * @throws AccessControlException if access is denied.
+ * @throws FileNotFoundException when the path does not exist.
+ * @throws IOException raised on errors performing I/O.
+ * @throws UnsupportedFileSystemException if there was no known implementation
+ * for the scheme.
+ * @return file status
*/
public FileStatus getFileLinkStatus(final Path f)
throws AccessControlException, FileNotFoundException,
@@ -2797,6 +2926,7 @@ public FileStatus getFileLinkStatus(final Path f)
/**
* See {@link AbstractFileSystem#supportsSymlinks()}.
+ * @return if support symlinkls true, not false.
*/
public boolean supportsSymlinks() {
return false;
@@ -2804,8 +2934,11 @@ public boolean supportsSymlinks() {
/**
* See {@link FileContext#getLinkTarget(Path)}.
+ * @param f the path.
* @throws UnsupportedOperationException if the operation is unsupported
* (default outcome).
+ * @throws IOException IO failure.
+ * @return the path.
*/
public Path getLinkTarget(Path f) throws IOException {
// Supporting filesystems should override this method
@@ -2815,8 +2948,11 @@ public Path getLinkTarget(Path f) throws IOException {
/**
* See {@link AbstractFileSystem#getLinkTarget(Path)}.
+ * @param f the path.
* @throws UnsupportedOperationException if the operation is unsupported
* (default outcome).
+ * @throws IOException IO failure.
+ * @return the path.
*/
protected Path resolveLink(Path f) throws IOException {
// Supporting filesystems should override this method
@@ -3220,7 +3356,7 @@ public void removeXAttr(Path path, String name) throws IOException {
/**
* Set the source path to satisfy storage policy.
* @param path The source path referring to either a directory or a file.
- * @throws IOException
+ * @throws IOException If an I/O error occurred.
*/
public void satisfyStoragePolicy(final Path path) throws IOException {
throw new UnsupportedOperationException(
@@ -3528,7 +3664,7 @@ FileSystem getUnique(URI uri, Configuration conf) throws IOException{
* @param conf configuration
* @param key key to store/retrieve this FileSystem in the cache
* @return a cached or newly instantiated FileSystem.
- * @throws IOException
+ * @throws IOException If an I/O error occurred.
*/
private FileSystem getInternal(URI uri, Configuration conf, Key key)
throws IOException{
@@ -3543,11 +3679,7 @@ private FileSystem getInternal(URI uri, Configuration conf, Key key)
// to construct an instance.
try (DurationInfo d = new DurationInfo(LOGGER, false,
"Acquiring creator semaphore for %s", uri)) {
- creatorPermits.acquire();
- } catch (InterruptedException e) {
- // acquisition was interrupted; convert to an IOE.
- throw (IOException)new InterruptedIOException(e.toString())
- .initCause(e);
+ creatorPermits.acquireUninterruptibly();
}
FileSystem fsToClose = null;
try {
@@ -4023,6 +4155,7 @@ public void run() {
/**
* Get or create the thread-local data associated with the current thread.
+ * @return statistics data.
*/
public StatisticsData getThreadStatistics() {
StatisticsData data = threadData.get();
@@ -4381,6 +4514,7 @@ public static synchronized Map getStatistics() {
/**
* Return the FileSystem classes that have Statistics.
* @deprecated use {@link #getGlobalStorageStatistics()}
+ * @return statistics lists.
*/
@Deprecated
public static synchronized List getAllStatistics() {
@@ -4389,6 +4523,7 @@ public static synchronized List getAllStatistics() {
/**
* Get the statistics for a particular file system.
+ * @param scheme scheme.
* @param cls the class to lookup
* @return a statistics object
* @deprecated use {@link #getGlobalStorageStatistics()}
@@ -4423,6 +4558,7 @@ public static synchronized void clearStatistics() {
/**
* Print all statistics for all file systems to {@code System.out}
+ * @throws IOException If an I/O error occurred.
*/
public static synchronized
void printStatistics() throws IOException {
@@ -4463,6 +4599,7 @@ public StorageStatistics getStorageStatistics() {
/**
* Get the global storage statistics.
+ * @return global storage statistics.
*/
public static GlobalStorageStatistics getGlobalStorageStatistics() {
return GlobalStorageStatistics.INSTANCE;
@@ -4616,7 +4753,7 @@ protected CompletableFuture openFileWithOptions(
final OpenFileParameters parameters) throws IOException {
AbstractFSBuilderImpl.rejectUnknownMandatoryKeys(
parameters.getMandatoryKeys(),
- Collections.emptySet(),
+ Options.OpenFileOptions.FS_OPTION_OPENFILE_STANDARD_OPTIONS,
"for " + path);
return LambdaUtils.eval(
new CompletableFuture<>(), () ->
@@ -4644,7 +4781,7 @@ protected CompletableFuture openFileWithOptions(
final OpenFileParameters parameters) throws IOException {
AbstractFSBuilderImpl.rejectUnknownMandatoryKeys(
parameters.getMandatoryKeys(),
- Collections.emptySet(), "");
+ Options.OpenFileOptions.FS_OPTION_OPENFILE_STANDARD_OPTIONS, "");
CompletableFuture result = new CompletableFuture<>();
try {
result.complete(open(pathHandle, parameters.getBufferSize()));
@@ -4751,9 +4888,11 @@ public CompletableFuture build() throws IOException {
Optional optionalPath = getOptionalPath();
OpenFileParameters parameters = new OpenFileParameters()
.withMandatoryKeys(getMandatoryKeys())
+ .withOptionalKeys(getOptionalKeys())
.withOptions(getOptions())
- .withBufferSize(getBufferSize())
- .withStatus(super.getStatus()); // explicit to avoid IDE warnings
+ .withStatus(super.getStatus())
+ .withBufferSize(
+ getOptions().getInt(FS_OPTION_OPENFILE_BUFFER_SIZE, getBufferSize()));
if(optionalPath.isPresent()) {
return getFS().openFileWithOptions(optionalPath.get(),
parameters);
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystemLinkResolver.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystemLinkResolver.java
index 7eec0eb7cec54..593495a1daa88 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystemLinkResolver.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileSystemLinkResolver.java
@@ -38,8 +38,8 @@ public abstract class FileSystemLinkResolver {
* an UnresolvedLinkException if called on an unresolved {@link Path}.
* @param p Path on which to perform an operation
* @return Generic type returned by operation
- * @throws IOException
- * @throws UnresolvedLinkException
+ * @throws IOException raised on errors performing I/O.
+ * @throws UnresolvedLinkException unresolved link exception.
*/
abstract public T doCall(final Path p) throws IOException,
UnresolvedLinkException;
@@ -54,7 +54,7 @@ abstract public T doCall(final Path p) throws IOException,
* @param p
* Resolved Target of path
* @return Generic type determined by implementation
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
abstract public T next(final FileSystem fs, final Path p) throws IOException;
@@ -66,7 +66,7 @@ abstract public T doCall(final Path p) throws IOException,
* @param filesys FileSystem with which to try call
* @param path Path with which to try call
* @return Generic type determined by implementation
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
public T resolve(final FileSystem filesys, final Path path)
throws IOException {
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileUtil.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileUtil.java
index 3c88e5d21bf6b..2af0a7b9e742f 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileUtil.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FileUtil.java
@@ -36,13 +36,18 @@
import java.nio.charset.CharsetEncoder;
import java.nio.charset.StandardCharsets;
import java.nio.file.AccessDeniedException;
+import java.nio.file.attribute.PosixFilePermission;
import java.nio.file.FileSystems;
import java.nio.file.Files;
+import java.nio.file.LinkOption;
+import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.Enumeration;
+import java.util.EnumSet;
import java.util.List;
import java.util.Map;
import java.util.Objects;
+import java.util.Set;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
import java.util.concurrent.Executors;
@@ -51,13 +56,13 @@
import java.util.jar.JarOutputStream;
import java.util.jar.Manifest;
import java.util.zip.GZIPInputStream;
-import java.util.zip.ZipEntry;
-import java.util.zip.ZipFile;
-import java.util.zip.ZipInputStream;
import org.apache.commons.collections.map.CaseInsensitiveMap;
import org.apache.commons.compress.archivers.tar.TarArchiveEntry;
import org.apache.commons.compress.archivers.tar.TarArchiveInputStream;
+import org.apache.commons.compress.archivers.zip.ZipArchiveEntry;
+import org.apache.commons.compress.archivers.zip.ZipArchiveInputStream;
+import org.apache.commons.compress.archivers.zip.ZipFile;
import org.apache.commons.io.FileUtils;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
@@ -72,6 +77,11 @@
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY;
+import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_LENGTH;
+import static org.apache.hadoop.fs.Options.OpenFileOptions.FS_OPTION_OPENFILE_READ_POLICY_WHOLE_FILE;
+import static org.apache.hadoop.util.functional.FutureIO.awaitFuture;
+
/**
* A collection of file-processing util methods
*/
@@ -152,6 +162,8 @@ public static void fullyDeleteOnExit(final File file) {
* (3) If dir is a normal file, it is deleted.
* (4) If dir is a normal directory, then dir and all its contents recursively
* are deleted.
+ * @param dir dir.
+ * @return fully delete status.
*/
public static boolean fullyDelete(final File dir) {
return fullyDelete(dir, false);
@@ -247,6 +259,9 @@ private static boolean deleteImpl(final File f, final boolean doLog) {
* we return false, the directory may be partially-deleted.
* If dir is a symlink to a directory, all the contents of the actual
* directory pointed to by dir will be deleted.
+ *
+ * @param dir dir.
+ * @return fullyDeleteContents Status.
*/
public static boolean fullyDeleteContents(final File dir) {
return fullyDeleteContents(dir, false);
@@ -257,8 +272,11 @@ public static boolean fullyDeleteContents(final File dir) {
* we return false, the directory may be partially-deleted.
* If dir is a symlink to a directory, all the contents of the actual
* directory pointed to by dir will be deleted.
+ *
+ * @param dir dir.
* @param tryGrantPermissions if 'true', try grant +rwx permissions to this
* and all the underlying directories before trying to delete their contents.
+ * @return fully delete contents status.
*/
public static boolean fullyDeleteContents(final File dir, final boolean tryGrantPermissions) {
if (tryGrantPermissions) {
@@ -301,7 +319,7 @@ public static boolean fullyDeleteContents(final File dir, final boolean tryGrant
*
* @param fs {@link FileSystem} on which the path is present
* @param dir directory to recursively delete
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
* @deprecated Use {@link FileSystem#delete(Path, boolean)}
*/
@Deprecated
@@ -333,7 +351,17 @@ private static void checkDependencies(FileSystem srcFS,
}
}
- /** Copy files between FileSystems. */
+ /**
+ * Copy files between FileSystems.
+ * @param srcFS src fs.
+ * @param src src.
+ * @param dstFS dst fs.
+ * @param dst dst.
+ * @param deleteSource delete source.
+ * @param conf configuration.
+ * @return if copy success true, not false.
+ * @throws IOException raised on errors performing I/O.
+ */
public static boolean copy(FileSystem srcFS, Path src,
FileSystem dstFS, Path dst,
boolean deleteSource,
@@ -381,7 +409,19 @@ public static boolean copy(FileSystem srcFS, Path[] srcs,
return returnVal;
}
- /** Copy files between FileSystems. */
+ /**
+ * Copy files between FileSystems.
+ *
+ * @param srcFS srcFs.
+ * @param src src.
+ * @param dstFS dstFs.
+ * @param dst dst.
+ * @param deleteSource delete source.
+ * @param overwrite overwrite.
+ * @param conf configuration.
+ * @throws IOException raised on errors performing I/O.
+ * @return true if the operation succeeded.
+ */
public static boolean copy(FileSystem srcFS, Path src,
FileSystem dstFS, Path dst,
boolean deleteSource,
@@ -391,7 +431,33 @@ public static boolean copy(FileSystem srcFS, Path src,
return copy(srcFS, fileStatus, dstFS, dst, deleteSource, overwrite, conf);
}
- /** Copy files between FileSystems. */
+ /**
+ * Copy a file/directory tree within/between filesystems.
+ *
+ * returns true if the operation succeeded. When deleteSource is true,
+ * this means "after the copy, delete(source) returned true"
+ * If the destination is a directory, and mkdirs (dest) fails,
+ * the operation will return false rather than raise any exception.
+ *
+ * The overwrite flag is about overwriting files; it has no effect about
+ * handing an attempt to copy a file atop a directory (expect an IOException),
+ * or a directory over a path which contains a file (mkdir will fail, so
+ * "false").
+ *
+ * The operation is recursive, and the deleteSource operation takes place
+ * as each subdirectory is copied. Therefore, if an operation fails partway
+ * through, the source tree may be partially deleted.
+ *
+ * @param srcFS source filesystem
+ * @param srcStatus status of source
+ * @param dstFS destination filesystem
+ * @param dst path of source
+ * @param deleteSource delete the source?
+ * @param overwrite overwrite files at destination?
+ * @param conf configuration to use when opening files
+ * @return true if the operation succeeded.
+ * @throws IOException failure
+ */
public static boolean copy(FileSystem srcFS, FileStatus srcStatus,
FileSystem dstFS, Path dst,
boolean deleteSource,
@@ -404,22 +470,27 @@ public static boolean copy(FileSystem srcFS, FileStatus srcStatus,
if (!dstFS.mkdirs(dst)) {
return false;
}
- FileStatus contents[] = srcFS.listStatus(src);
- for (int i = 0; i < contents.length; i++) {
- copy(srcFS, contents[i], dstFS,
- new Path(dst, contents[i].getPath().getName()),
- deleteSource, overwrite, conf);
+ RemoteIterator contents = srcFS.listStatusIterator(src);
+ while (contents.hasNext()) {
+ FileStatus next = contents.next();
+ copy(srcFS, next, dstFS,
+ new Path(dst, next.getPath().getName()),
+ deleteSource, overwrite, conf);
}
} else {
- InputStream in=null;
+ InputStream in = null;
OutputStream out = null;
try {
- in = srcFS.open(src);
+ in = awaitFuture(srcFS.openFile(src)
+ .opt(FS_OPTION_OPENFILE_READ_POLICY,
+ FS_OPTION_OPENFILE_READ_POLICY_WHOLE_FILE)
+ .opt(FS_OPTION_OPENFILE_LENGTH,
+ srcStatus.getLen()) // file length hint for object stores
+ .build());
out = dstFS.create(dst, overwrite);
IOUtils.copyBytes(in, out, conf, true);
} catch (IOException e) {
- IOUtils.closeStream(out);
- IOUtils.closeStream(in);
+ IOUtils.cleanupWithLogger(LOG, in, out);
throw e;
}
}
@@ -431,7 +502,17 @@ public static boolean copy(FileSystem srcFS, FileStatus srcStatus,
}
- /** Copy local files to a FileSystem. */
+ /**
+ * Copy local files to a FileSystem.
+ *
+ * @param src src.
+ * @param dstFS dstFs.
+ * @param dst dst.
+ * @param deleteSource delete source.
+ * @param conf configuration.
+ * @throws IOException raised on errors performing I/O.
+ * @return true if the operation succeeded.
+ */
public static boolean copy(File src,
FileSystem dstFS, Path dst,
boolean deleteSource,
@@ -474,7 +555,17 @@ public static boolean copy(File src,
}
}
- /** Copy FileSystem files to local files. */
+ /**
+ * Copy FileSystem files to local files.
+ *
+ * @param srcFS srcFs.
+ * @param src src.
+ * @param dst dst.
+ * @param deleteSource delete source.
+ * @param conf configuration.
+ * @throws IOException raised on errors performing I/O.
+ * @return true if the operation succeeded.
+ */
public static boolean copy(FileSystem srcFS, Path src,
File dst, boolean deleteSource,
Configuration conf) throws IOException {
@@ -498,7 +589,11 @@ private static boolean copy(FileSystem srcFS, FileStatus srcStatus,
deleteSource, conf);
}
} else {
- InputStream in = srcFS.open(src);
+ InputStream in = awaitFuture(srcFS.openFile(src)
+ .withFileStatus(srcStatus)
+ .opt(FS_OPTION_OPENFILE_READ_POLICY,
+ FS_OPTION_OPENFILE_READ_POLICY_WHOLE_FILE)
+ .build());
IOUtils.copyBytes(in, Files.newOutputStream(dst.toPath()), conf);
}
if (deleteSource) {
@@ -533,6 +628,26 @@ private static Path checkDest(String srcName, FileSystem dstFS, Path dst,
return dst;
}
+ public static boolean isRegularFile(File file) {
+ return isRegularFile(file, true);
+ }
+
+ /**
+ * Check if the file is regular.
+ * @param file The file being checked.
+ * @param allowLinks Whether to allow matching links.
+ * @return Returns the result of checking whether the file is a regular file.
+ */
+ public static boolean isRegularFile(File file, boolean allowLinks) {
+ if (file != null) {
+ if (allowLinks) {
+ return Files.isRegularFile(file.toPath());
+ }
+ return Files.isRegularFile(file.toPath(), LinkOption.NOFOLLOW_LINKS);
+ }
+ return true;
+ }
+
/**
* Convert a os-native filename to a path that works for the shell.
* @param filename The filename to convert
@@ -622,12 +737,12 @@ public static long getDU(File dir) {
*/
public static void unZip(InputStream inputStream, File toDir)
throws IOException {
- try (ZipInputStream zip = new ZipInputStream(inputStream)) {
+ try (ZipArchiveInputStream zip = new ZipArchiveInputStream(inputStream)) {
int numOfFailedLastModifiedSet = 0;
String targetDirPath = toDir.getCanonicalPath() + File.separator;
- for(ZipEntry entry = zip.getNextEntry();
+ for(ZipArchiveEntry entry = zip.getNextZipEntry();
entry != null;
- entry = zip.getNextEntry()) {
+ entry = zip.getNextZipEntry()) {
if (!entry.isDirectory()) {
File file = new File(toDir, entry.getName());
if (!file.getCanonicalPath().startsWith(targetDirPath)) {
@@ -646,6 +761,9 @@ public static void unZip(InputStream inputStream, File toDir)
if (!file.setLastModified(entry.getTime())) {
numOfFailedLastModifiedSet++;
}
+ if (entry.getPlatform() == ZipArchiveEntry.PLATFORM_UNIX) {
+ Files.setPosixFilePermissions(file.toPath(), permissionsFromMode(entry.getUnixMode()));
+ }
}
}
if (numOfFailedLastModifiedSet > 0) {
@@ -655,6 +773,49 @@ public static void unZip(InputStream inputStream, File toDir)
}
}
+ /**
+ * The permission operation of this method only involves users, user groups, and others.
+ * If SUID is set, only executable permissions are reserved.
+ * @param mode Permissions are represented by numerical values
+ * @return The original permissions for files are stored in collections
+ */
+ private static Set permissionsFromMode(int mode) {
+ EnumSet permissions =
+ EnumSet.noneOf(PosixFilePermission.class);
+ addPermissions(permissions, mode, PosixFilePermission.OTHERS_READ,
+ PosixFilePermission.OTHERS_WRITE, PosixFilePermission.OTHERS_EXECUTE);
+ addPermissions(permissions, mode >> 3, PosixFilePermission.GROUP_READ,
+ PosixFilePermission.GROUP_WRITE, PosixFilePermission.GROUP_EXECUTE);
+ addPermissions(permissions, mode >> 6, PosixFilePermission.OWNER_READ,
+ PosixFilePermission.OWNER_WRITE, PosixFilePermission.OWNER_EXECUTE);
+ return permissions;
+ }
+
+ /**
+ * Assign the original permissions to the file
+ * @param permissions The original permissions for files are stored in collections
+ * @param mode Use a value of type int to indicate permissions
+ * @param r Read permission
+ * @param w Write permission
+ * @param x Execute permission
+ */
+ private static void addPermissions(
+ Set permissions,
+ int mode,
+ PosixFilePermission r,
+ PosixFilePermission w,
+ PosixFilePermission x) {
+ if ((mode & 1L) == 1L) {
+ permissions.add(x);
+ }
+ if ((mode & 2L) == 2L) {
+ permissions.add(w);
+ }
+ if ((mode & 4L) == 4L) {
+ permissions.add(r);
+ }
+ }
+
/**
* Given a File input it will unzip it in the unzip directory.
* passed as the second parameter
@@ -663,14 +824,14 @@ public static void unZip(InputStream inputStream, File toDir)
* @throws IOException An I/O exception has occurred
*/
public static void unZip(File inFile, File unzipDir) throws IOException {
- Enumeration extends ZipEntry> entries;
+ Enumeration extends ZipArchiveEntry> entries;
ZipFile zipFile = new ZipFile(inFile);
try {
- entries = zipFile.entries();
+ entries = zipFile.getEntries();
String targetDirPath = unzipDir.getCanonicalPath() + File.separator;
while (entries.hasMoreElements()) {
- ZipEntry entry = entries.nextElement();
+ ZipArchiveEntry entry = entries.nextElement();
if (!entry.isDirectory()) {
InputStream in = zipFile.getInputStream(entry);
try {
@@ -695,6 +856,9 @@ public static void unZip(File inFile, File unzipDir) throws IOException {
} finally {
out.close();
}
+ if (entry.getPlatform() == ZipArchiveEntry.PLATFORM_UNIX) {
+ Files.setPosixFilePermissions(file.toPath(), permissionsFromMode(entry.getUnixMode()));
+ }
} finally {
in.close();
}
@@ -845,7 +1009,7 @@ public static void unTar(InputStream inputStream, File untarDir,
*
* @param inFile The tar file as input.
* @param untarDir The untar directory where to untar the tar file.
- * @throws IOException
+ * @throws IOException an exception occurred.
*/
public static void unTar(File inFile, File untarDir) throws IOException {
if (!untarDir.mkdirs()) {
@@ -888,10 +1052,13 @@ private static void unTarUsingTar(InputStream inputStream, File untarDir,
private static void unTarUsingTar(File inFile, File untarDir,
boolean gzipped) throws IOException {
StringBuffer untarCommand = new StringBuffer();
+ // not using canonical path here; this postpones relative path
+ // resolution until bash is executed.
+ final String source = "'" + FileUtil.makeSecureShellPath(inFile) + "'";
if (gzipped) {
- untarCommand.append(" gzip -dc '")
- .append(FileUtil.makeSecureShellPath(inFile))
- .append("' | (");
+ untarCommand.append(" gzip -dc ")
+ .append(source)
+ .append(" | (");
}
untarCommand.append("cd '")
.append(FileUtil.makeSecureShellPath(untarDir))
@@ -901,15 +1068,17 @@ private static void unTarUsingTar(File inFile, File untarDir,
if (gzipped) {
untarCommand.append(" -)");
} else {
- untarCommand.append(FileUtil.makeSecureShellPath(inFile));
+ untarCommand.append(source);
}
+ LOG.debug("executing [{}]", untarCommand);
String[] shellCmd = { "bash", "-c", untarCommand.toString() };
ShellCommandExecutor shexec = new ShellCommandExecutor(shellCmd);
shexec.execute();
int exitcode = shexec.getExitCode();
if (exitcode != 0) {
throw new IOException("Error untarring file " + inFile +
- ". Tar process exited with exit code " + exitcode);
+ ". Tar process exited with exit code " + exitcode
+ + " from command " + untarCommand);
}
}
@@ -966,6 +1135,14 @@ private static void unpackEntries(TarArchiveInputStream tis,
+ " would create entry outside of " + outputDir);
}
+ if (entry.isSymbolicLink() || entry.isLink()) {
+ String canonicalTargetPath = getCanonicalPath(entry.getLinkName(), outputDir);
+ if (!canonicalTargetPath.startsWith(targetDirPath)) {
+ throw new IOException(
+ "expanding " + entry.getName() + " would create entry outside of " + outputDir);
+ }
+ }
+
if (entry.isDirectory()) {
File subDir = new File(outputDir, entry.getName());
if (!subDir.mkdirs() && !subDir.isDirectory()) {
@@ -981,10 +1158,12 @@ private static void unpackEntries(TarArchiveInputStream tis,
}
if (entry.isSymbolicLink()) {
- // Create symbolic link relative to tar parent dir
- Files.createSymbolicLink(FileSystems.getDefault()
- .getPath(outputDir.getPath(), entry.getName()),
- FileSystems.getDefault().getPath(entry.getLinkName()));
+ // Create symlink with canonical target path to ensure that we don't extract
+ // outside targetDirPath
+ String canonicalTargetPath = getCanonicalPath(entry.getLinkName(), outputDir);
+ Files.createSymbolicLink(
+ FileSystems.getDefault().getPath(outputDir.getPath(), entry.getName()),
+ FileSystems.getDefault().getPath(canonicalTargetPath));
return;
}
@@ -996,7 +1175,8 @@ private static void unpackEntries(TarArchiveInputStream tis,
}
if (entry.isLink()) {
- File src = new File(outputDir, entry.getLinkName());
+ String canonicalTargetPath = getCanonicalPath(entry.getLinkName(), outputDir);
+ File src = new File(canonicalTargetPath);
HardLink.createHardLink(src, outputFile);
return;
}
@@ -1004,6 +1184,20 @@ private static void unpackEntries(TarArchiveInputStream tis,
org.apache.commons.io.FileUtils.copyToFile(tis, outputFile);
}
+ /**
+ * Gets the canonical path for the given path.
+ *
+ * @param path The path for which the canonical path needs to be computed.
+ * @param parentDir The parent directory to use if the path is a relative path.
+ * @return The canonical path of the given path.
+ */
+ private static String getCanonicalPath(String path, File parentDir) throws IOException {
+ java.nio.file.Path targetPath = Paths.get(path);
+ return (targetPath.isAbsolute() ?
+ new File(path) :
+ new File(parentDir, path)).getCanonicalPath();
+ }
+
/**
* Class for creating hardlinks.
* Supports Unix, WindXP.
@@ -1026,6 +1220,7 @@ public static class HardLink extends org.apache.hadoop.fs.HardLink {
* @param target the target for symlink
* @param linkname the symlink
* @return 0 on success
+ * @throws IOException raised on errors performing I/O.
*/
public static int symLink(String target, String linkname) throws IOException{
@@ -1087,8 +1282,8 @@ public static int symLink(String target, String linkname) throws IOException{
* @param filename the name of the file to change
* @param perm the permission string
* @return the exit code from the command
- * @throws IOException
- * @throws InterruptedException
+ * @throws IOException raised on errors performing I/O.
+ * @throws InterruptedException command interrupted.
*/
public static int chmod(String filename, String perm
) throws IOException, InterruptedException {
@@ -1102,7 +1297,7 @@ public static int chmod(String filename, String perm
* @param perm permission string
* @param recursive true, if permissions should be changed recursively
* @return the exit code from the command.
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
public static int chmod(String filename, String perm, boolean recursive)
throws IOException {
@@ -1128,7 +1323,7 @@ public static int chmod(String filename, String perm, boolean recursive)
* @param file the file to change
* @param username the new user owner name
* @param groupname the new group owner name
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
public static void setOwner(File file, String username,
String groupname) throws IOException {
@@ -1145,7 +1340,7 @@ public static void setOwner(File file, String username,
* Platform independent implementation for {@link File#setReadable(boolean)}
* File#setReadable does not work as expected on Windows.
* @param f input file
- * @param readable
+ * @param readable readable.
* @return true on success, false otherwise
*/
public static boolean setReadable(File f, boolean readable) {
@@ -1166,7 +1361,7 @@ public static boolean setReadable(File f, boolean readable) {
* Platform independent implementation for {@link File#setWritable(boolean)}
* File#setWritable does not work as expected on Windows.
* @param f input file
- * @param writable
+ * @param writable writable.
* @return true on success, false otherwise
*/
public static boolean setWritable(File f, boolean writable) {
@@ -1190,7 +1385,7 @@ public static boolean setWritable(File f, boolean writable) {
* behavior on Windows as on Unix platforms. Creating, deleting or renaming
* a file within that folder will still succeed on Windows.
* @param f input file
- * @param executable
+ * @param executable executable.
* @return true on success, false otherwise
*/
public static boolean setExecutable(File f, boolean executable) {
@@ -1269,7 +1464,7 @@ public static boolean canExecute(File f) {
* of forking if group == other.
* @param f the file to change
* @param permission the new permissions
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
public static void setPermission(File f, FsPermission permission
) throws IOException {
@@ -1574,6 +1769,7 @@ public static List getJarsInDirectory(String path) {
* wildcard path to return all jars from the directory to use in a classpath.
*
* @param path the path to the directory. The path may include the wildcard.
+ * @param useLocal use local.
* @return the list of jars as URLs, or an empty list if there are no jars, or
* the directory does not exist
*/
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FilterFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FilterFileSystem.java
index 607aa263622f6..cdbe51e330701 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FilterFileSystem.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FilterFileSystem.java
@@ -233,7 +233,7 @@ public FSDataOutputStream createNonRecursive(Path f, FsPermission permission,
*
* @param src file name
* @param replication new replication
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
* @return true if successful;
* false if file does not exist or is a directory
*/
@@ -304,7 +304,7 @@ public Path getHomeDirectory() {
* Set the current working directory for the given file system. All relative
* paths will be resolved relative to it.
*
- * @param newDir
+ * @param newDir new dir.
*/
@Override
public void setWorkingDirectory(Path newDir) {
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsShell.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsShell.java
index 7275b70227f99..73258661ec191 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsShell.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsShell.java
@@ -130,7 +130,7 @@ public Path getCurrentTrashDir() throws IOException {
* Returns the current trash location for the path specified
* @param path to be deleted
* @return path to the trash
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
public Path getCurrentTrashDir(Path path) throws IOException {
return getTrash().getCurrentTrashDir(path);
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsStatus.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsStatus.java
index d392c7d765d72..c4bc341bf4f7c 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsStatus.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FsStatus.java
@@ -35,24 +35,39 @@ public class FsStatus implements Writable {
private long used;
private long remaining;
- /** Construct a FsStatus object, using the specified statistics */
+ /**
+ * Construct a FsStatus object, using the specified statistics.
+ *
+ * @param capacity capacity.
+ * @param used used.
+ * @param remaining remaining.
+ */
public FsStatus(long capacity, long used, long remaining) {
this.capacity = capacity;
this.used = used;
this.remaining = remaining;
}
- /** Return the capacity in bytes of the file system */
+ /**
+ * Return the capacity in bytes of the file system.
+ * @return capacity.
+ */
public long getCapacity() {
return capacity;
}
- /** Return the number of bytes used on the file system */
+ /**
+ * Return the number of bytes used on the file system.
+ * @return used.
+ */
public long getUsed() {
return used;
}
- /** Return the number of remaining bytes on the file system */
+ /**
+ * Return the number of remaining bytes on the file system.
+ * @return remaining.
+ */
public long getRemaining() {
return remaining;
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FutureDataInputStreamBuilder.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FutureDataInputStreamBuilder.java
index 27a522e593001..e7f441a75d3c8 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FutureDataInputStreamBuilder.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/FutureDataInputStreamBuilder.java
@@ -17,6 +17,7 @@
*/
package org.apache.hadoop.fs;
+import javax.annotation.Nullable;
import java.io.IOException;
import java.util.concurrent.CompletableFuture;
@@ -34,7 +35,7 @@
* options accordingly, for example:
*
* If the option is not related to the file system, the option will be ignored.
- * If the option is must, but not supported by the file system, a
+ * If the option is must, but not supported/known by the file system, an
* {@link IllegalArgumentException} will be thrown.
*
*/
@@ -51,10 +52,11 @@ CompletableFuture build()
/**
* A FileStatus may be provided to the open request.
* It is up to the implementation whether to use this or not.
- * @param status status.
+ * @param status status: may be null
* @return the builder.
*/
- default FutureDataInputStreamBuilder withFileStatus(FileStatus status) {
+ default FutureDataInputStreamBuilder withFileStatus(
+ @Nullable FileStatus status) {
return this;
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/GlobExpander.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/GlobExpander.java
index cb430ed3f6251..c87444c6c87f7 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/GlobExpander.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/GlobExpander.java
@@ -56,9 +56,9 @@ public StringWithOffset(String string, int offset) {
* {a,b}/{c/\d} - {a,b}/c/d
*
*
- * @param filePattern
+ * @param filePattern file pattern.
* @return expanded file patterns
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
public static List expand(String filePattern) throws IOException {
List fullyExpanded = new ArrayList();
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/GlobalStorageStatistics.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/GlobalStorageStatistics.java
index 30ce07a422e6e..d94339034447a 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/GlobalStorageStatistics.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/GlobalStorageStatistics.java
@@ -104,6 +104,8 @@ public synchronized void reset() {
/**
* Get an iterator that we can use to iterate throw all the global storage
* statistics objects.
+ *
+ * @return StorageStatistics Iterator.
*/
synchronized public Iterator iterator() {
Entry first = map.firstEntry();
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HarFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HarFileSystem.java
index 7e12d0a11e953..1d64b0bcbe921 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HarFileSystem.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HarFileSystem.java
@@ -463,7 +463,7 @@ static BlockLocation[] fixBlockLocations(BlockLocation[] locations,
* @param start the start of the desired range in the contained file
* @param len the length of the desired range
* @return block locations for this segment of file
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
@Override
public BlockLocation[] getFileBlockLocations(FileStatus file, long start,
@@ -525,7 +525,7 @@ private void fileStatusesInIndex(HarStatus parent, List statuses)
* Combine the status stored in the index and the underlying status.
* @param h status stored in the index
* @return the combined file status
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
private FileStatus toFileStatus(HarStatus h) throws IOException {
final Path p = h.isDir ? archivePath : new Path(archivePath, h.partName);
@@ -635,7 +635,7 @@ public long getModificationTime() {
* while creating a hadoop archive.
* @param f the path in har filesystem
* @return filestatus.
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
@Override
public FileStatus getFileStatus(Path f) throws IOException {
@@ -1104,7 +1104,7 @@ public void setDropBehind(Boolean dropBehind) throws IOException {
* @param start the start position in the part file
* @param length the length of valid data in the part file
* @param bufsize the buffer size
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
public HarFSDataInputStream(FileSystem fs, Path p, long start,
long length, int bufsize) throws IOException {
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HardLink.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HardLink.java
index 855fbb04e59b4..1624c5d395aec 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HardLink.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HardLink.java
@@ -156,6 +156,7 @@ String[] linkCount(File file) throws IOException {
* Creates a hardlink.
* @param file - existing source file
* @param linkName - desired target link file
+ * @throws IOException raised on errors performing I/O.
*/
public static void createHardLink(File file, File linkName)
throws IOException {
@@ -177,6 +178,7 @@ public static void createHardLink(File file, File linkName)
* @param fileBaseNames - list of path-less file names, as returned by
* parentDir.list()
* @param linkDir - where the hardlinks should be put. It must already exist.
+ * @throws IOException raised on errors performing I/O.
*/
public static void createHardLinkMult(File parentDir, String[] fileBaseNames,
File linkDir) throws IOException {
@@ -204,6 +206,10 @@ public static void createHardLinkMult(File parentDir, String[] fileBaseNames,
/**
* Retrieves the number of links to the specified file.
+ *
+ * @param fileName file name.
+ * @throws IOException raised on errors performing I/O.
+ * @return link count.
*/
public static int getLinkCount(File fileName) throws IOException {
if (fileName == null) {
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HasFileDescriptor.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HasFileDescriptor.java
index bcf325ceca5df..a0e89d6aeac44 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HasFileDescriptor.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/HasFileDescriptor.java
@@ -33,7 +33,7 @@ public interface HasFileDescriptor {
/**
* @return the FileDescriptor
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
public FileDescriptor getFileDescriptor() throws IOException;
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocalDirAllocator.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocalDirAllocator.java
index 5f266a7b82555..774e015b37343 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocalDirAllocator.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocalDirAllocator.java
@@ -78,8 +78,9 @@ public class LocalDirAllocator {
private final DiskValidator diskValidator;
- /**Create an allocator object
- * @param contextCfgItemName
+ /**
+ * Create an allocator object.
+ * @param contextCfgItemName contextCfgItemName.
*/
public LocalDirAllocator(String contextCfgItemName) {
this.contextCfgItemName = contextCfgItemName;
@@ -123,7 +124,7 @@ private AllocatorPerContext obtainContext(String contextCfgItemName) {
* available disk)
* @param conf the Configuration object
* @return the complete path to the file on a local disk
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
public Path getLocalPathForWrite(String pathStr,
Configuration conf) throws IOException {
@@ -139,7 +140,7 @@ public Path getLocalPathForWrite(String pathStr,
* @param size the size of the file that is going to be written
* @param conf the Configuration object
* @return the complete path to the file on a local disk
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
public Path getLocalPathForWrite(String pathStr, long size,
Configuration conf) throws IOException {
@@ -156,7 +157,7 @@ public Path getLocalPathForWrite(String pathStr, long size,
* @param conf the Configuration object
* @param checkWrite ensure that the path is writable
* @return the complete path to the file on a local disk
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
public Path getLocalPathForWrite(String pathStr, long size,
Configuration conf,
@@ -171,7 +172,7 @@ public Path getLocalPathForWrite(String pathStr, long size,
* @param pathStr the requested file (this will be searched)
* @param conf the Configuration object
* @return the complete path to the file on a local disk
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
public Path getLocalPathToRead(String pathStr,
Configuration conf) throws IOException {
@@ -184,7 +185,7 @@ public Path getLocalPathToRead(String pathStr,
* @param pathStr the path underneath the roots
* @param conf the configuration to look up the roots in
* @return all of the paths that exist under any of the roots
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
public Iterable getAllLocalPathsToRead(String pathStr,
Configuration conf
@@ -205,7 +206,7 @@ public Iterable getAllLocalPathsToRead(String pathStr,
* @param size the size of the file that is going to be written
* @param conf the Configuration object
* @return a unique temporary file
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
public File createTmpFileForWrite(String pathStr, long size,
Configuration conf) throws IOException {
@@ -213,8 +214,9 @@ public File createTmpFileForWrite(String pathStr, long size,
return context.createTmpFileForWrite(pathStr, size, conf);
}
- /** Method to check whether a context is valid
- * @param contextCfgItemName
+ /**
+ * Method to check whether a context is valid.
+ * @param contextCfgItemName contextCfgItemName.
* @return true/false
*/
public static boolean isContextValid(String contextCfgItemName) {
@@ -224,9 +226,9 @@ public static boolean isContextValid(String contextCfgItemName) {
}
/**
- * Removes the context from the context config items
+ * Removes the context from the context config items.
*
- * @param contextCfgItemName
+ * @param contextCfgItemName contextCfgItemName.
*/
@Deprecated
@InterfaceAudience.LimitedPrivate({"MapReduce"})
@@ -236,8 +238,9 @@ public static void removeContext(String contextCfgItemName) {
}
}
- /** We search through all the configured dirs for the file's existence
- * and return true when we find
+ /**
+ * We search through all the configured dirs for the file's existence
+ * and return true when we find.
* @param pathStr the requested file (this will be searched)
* @param conf the Configuration object
* @return true if files exist. false otherwise
@@ -393,6 +396,10 @@ public Path getLocalPathForWrite(String pathStr, long size,
Context ctx = confChanged(conf);
int numDirs = ctx.localDirs.length;
int numDirsSearched = 0;
+ // Max capacity in any directory
+ long maxCapacity = 0;
+ String errorText = null;
+ IOException diskException = null;
//remove the leading slash from the path (to make sure that the uri
//resolution results in a valid path on the dir being checked)
if (pathStr.startsWith("/")) {
@@ -441,9 +448,18 @@ public Path getLocalPathForWrite(String pathStr, long size,
int dirNum = ctx.getAndIncrDirNumLastAccessed(randomInc);
while (numDirsSearched < numDirs) {
long capacity = ctx.dirDF[dirNum].getAvailable();
+ if (capacity > maxCapacity) {
+ maxCapacity = capacity;
+ }
if (capacity > size) {
- returnPath =
- createPath(ctx.localDirs[dirNum], pathStr, checkWrite);
+ try {
+ returnPath = createPath(ctx.localDirs[dirNum], pathStr,
+ checkWrite);
+ } catch (IOException e) {
+ errorText = e.getMessage();
+ diskException = e;
+ LOG.debug("DiskException caught for dir {}", ctx.localDirs[dirNum], e);
+ }
if (returnPath != null) {
ctx.getAndIncrDirNumLastAccessed(numDirsSearched);
break;
@@ -459,8 +475,13 @@ public Path getLocalPathForWrite(String pathStr, long size,
}
//no path found
- throw new DiskErrorException("Could not find any valid local " +
- "directory for " + pathStr);
+ String newErrorText = "Could not find any valid local directory for " +
+ pathStr + " with requested size " + size +
+ " as the max capacity in any directory is " + maxCapacity;
+ if (errorText != null) {
+ newErrorText = newErrorText + " due to " + errorText;
+ }
+ throw new DiskErrorException(newErrorText, diskException);
}
/** Creates a file on the local FS. Pass size as
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocalFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocalFileSystem.java
index c41190a7b360b..590cbd9a49ece 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocalFileSystem.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/LocalFileSystem.java
@@ -71,7 +71,11 @@ public LocalFileSystem(FileSystem rawLocalFileSystem) {
super(rawLocalFileSystem);
}
- /** Convert a path to a File. */
+ /**
+ * Convert a path to a File.
+ * @param path the path.
+ * @return file.
+ */
public File pathToFile(Path path) {
return ((RawLocalFileSystem)fs).pathToFile(path);
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32CastagnoliFileChecksum.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32CastagnoliFileChecksum.java
index 5a4a6a97cc4f7..354e4a6b4657d 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32CastagnoliFileChecksum.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32CastagnoliFileChecksum.java
@@ -28,7 +28,13 @@ public MD5MD5CRC32CastagnoliFileChecksum() {
this(0, 0, null);
}
- /** Create a MD5FileChecksum */
+ /**
+ * Create a MD5FileChecksum.
+ *
+ * @param bytesPerCRC bytesPerCRC.
+ * @param crcPerBlock crcPerBlock.
+ * @param md5 md5.
+ */
public MD5MD5CRC32CastagnoliFileChecksum(int bytesPerCRC, long crcPerBlock, MD5Hash md5) {
super(bytesPerCRC, crcPerBlock, md5);
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32FileChecksum.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32FileChecksum.java
index 3fdb7e982621c..c5ac381f78238 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32FileChecksum.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32FileChecksum.java
@@ -44,7 +44,13 @@ public MD5MD5CRC32FileChecksum() {
this(0, 0, null);
}
- /** Create a MD5FileChecksum */
+ /**
+ * Create a MD5FileChecksum.
+ *
+ * @param bytesPerCRC bytesPerCRC.
+ * @param crcPerBlock crcPerBlock.
+ * @param md5 md5.
+ */
public MD5MD5CRC32FileChecksum(int bytesPerCRC, long crcPerBlock, MD5Hash md5) {
this.bytesPerCRC = bytesPerCRC;
this.crcPerBlock = crcPerBlock;
@@ -76,7 +82,10 @@ public byte[] getBytes() {
return WritableUtils.toByteArray(this);
}
- /** returns the CRC type */
+ /**
+ * returns the CRC type.
+ * @return data check sum type.
+ */
public DataChecksum.Type getCrcType() {
// default to the one that is understood by all releases.
return DataChecksum.Type.CRC32;
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32GzipFileChecksum.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32GzipFileChecksum.java
index 5164d0200d28d..f7996c8623717 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32GzipFileChecksum.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MD5MD5CRC32GzipFileChecksum.java
@@ -28,7 +28,13 @@ public MD5MD5CRC32GzipFileChecksum() {
this(0, 0, null);
}
- /** Create a MD5FileChecksum */
+ /**
+ * Create a MD5FileChecksum.
+ *
+ * @param bytesPerCRC bytesPerCRC.
+ * @param crcPerBlock crcPerBlock.
+ * @param md5 md5.
+ */
public MD5MD5CRC32GzipFileChecksum(int bytesPerCRC, long crcPerBlock, MD5Hash md5) {
super(bytesPerCRC, crcPerBlock, md5);
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MultipartUploader.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MultipartUploader.java
index dcb76b50b3429..5e4eda26c7f1d 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MultipartUploader.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MultipartUploader.java
@@ -31,10 +31,11 @@
/**
* MultipartUploader is an interface for copying files multipart and across
* multiple nodes.
- *
+ *
* The interface extends {@link IOStatisticsSource} so that there is no
* need to cast an instance to see if is a source of statistics.
* However, implementations MAY return null for their actual statistics.
+ *
*/
@InterfaceAudience.Public
@InterfaceStability.Unstable
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MultipartUploaderBuilder.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MultipartUploaderBuilder.java
index 381bfaa07f6d1..e7b0865063ee5 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MultipartUploaderBuilder.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/MultipartUploaderBuilder.java
@@ -25,34 +25,43 @@
/**
* Builder interface for Multipart readers.
- * @param
- * @param
+ * @param MultipartUploader Generic Type.
+ * @param MultipartUploaderBuilder Generic Type.
*/
public interface MultipartUploaderBuilder>
extends FSBuilder {
/**
* Set permission for the file.
+ * @param perm permission.
+ * @return B Generics Type.
*/
B permission(@Nonnull FsPermission perm);
/**
* Set the size of the buffer to be used.
+ * @param bufSize buffer size.
+ * @return B Generics Type.
*/
B bufferSize(int bufSize);
/**
* Set replication factor.
+ * @param replica replica.
+ * @return B Generics Type.
*/
B replication(short replica);
/**
* Set block size.
+ * @param blkSize blkSize.
+ * @return B Generics Type.
*/
B blockSize(long blkSize);
/**
* Create an FSDataOutputStream at the specified path.
+ * @return B Generics Type.
*/
B create();
@@ -60,16 +69,21 @@ public interface MultipartUploaderBuilder FS_OPTION_OPENFILE_STANDARD_OPTIONS =
+ Collections.unmodifiableSet(Stream.of(
+ FS_OPTION_OPENFILE_BUFFER_SIZE,
+ FS_OPTION_OPENFILE_READ_POLICY,
+ FS_OPTION_OPENFILE_LENGTH,
+ FS_OPTION_OPENFILE_SPLIT_START,
+ FS_OPTION_OPENFILE_SPLIT_END)
+ .collect(Collectors.toSet()));
+
+ /**
+ * Read policy for adaptive IO: {@value}.
+ */
+ public static final String FS_OPTION_OPENFILE_READ_POLICY_ADAPTIVE =
+ "adaptive";
+
+ /**
+ * Read policy {@value} -whateve the implementation does by default.
+ */
+ public static final String FS_OPTION_OPENFILE_READ_POLICY_DEFAULT =
+ "default";
+
+ /**
+ * Read policy for random IO: {@value}.
+ */
+ public static final String FS_OPTION_OPENFILE_READ_POLICY_RANDOM =
+ "random";
+
+ /**
+ * Read policy for sequential IO: {@value}.
+ */
+ public static final String FS_OPTION_OPENFILE_READ_POLICY_SEQUENTIAL =
+ "sequential";
+
+ /**
+ * Vectored IO API to be used: {@value}.
+ */
+ public static final String FS_OPTION_OPENFILE_READ_POLICY_VECTOR =
+ "vector";
+
+ /**
+ * Whole file to be read, end-to-end: {@value}.
+ */
+ public static final String FS_OPTION_OPENFILE_READ_POLICY_WHOLE_FILE =
+ "whole-file";
+
+ /**
+ * All the current read policies as a set.
+ */
+ public static final Set FS_OPTION_OPENFILE_READ_POLICIES =
+ Collections.unmodifiableSet(Stream.of(
+ FS_OPTION_OPENFILE_READ_POLICY_ADAPTIVE,
+ FS_OPTION_OPENFILE_READ_POLICY_DEFAULT,
+ FS_OPTION_OPENFILE_READ_POLICY_RANDOM,
+ FS_OPTION_OPENFILE_READ_POLICY_SEQUENTIAL,
+ FS_OPTION_OPENFILE_READ_POLICY_VECTOR,
+ FS_OPTION_OPENFILE_READ_POLICY_WHOLE_FILE)
+ .collect(Collectors.toSet()));
+
+ }
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PathIOException.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PathIOException.java
index deb3880ee4195..f32f2a93544bf 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PathIOException.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PathIOException.java
@@ -64,7 +64,13 @@ public PathIOException(String path, String error) {
this.path = path;
}
- protected PathIOException(String path, String error, Throwable cause) {
+ /**
+ * Use a subclass of PathIOException if possible.
+ * @param path for the exception
+ * @param error custom string to use an the error text
+ * @param cause cause of exception.
+ */
+ public PathIOException(String path, String error, Throwable cause) {
super(error, cause);
this.path = path;
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PositionedReadable.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PositionedReadable.java
index 6744d17a72666..7380402eb6156 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PositionedReadable.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/PositionedReadable.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -17,7 +17,11 @@
*/
package org.apache.hadoop.fs;
-import java.io.*;
+import java.io.EOFException;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.List;
+import java.util.function.IntFunction;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
@@ -85,4 +89,47 @@ void readFully(long position, byte[] buffer, int offset, int length)
* the read operation completed
*/
void readFully(long position, byte[] buffer) throws IOException;
+
+ /**
+ * What is the smallest reasonable seek?
+ * @return the minimum number of bytes
+ */
+ default int minSeekForVectorReads() {
+ return 4 * 1024;
+ }
+
+ /**
+ * What is the largest size that we should group ranges together as?
+ * @return the number of bytes to read at once
+ */
+ default int maxReadSizeForVectorReads() {
+ return 1024 * 1024;
+ }
+
+ /**
+ * Read fully a list of file ranges asynchronously from this file.
+ * The default iterates through the ranges to read each synchronously, but
+ * the intent is that FSDataInputStream subclasses can make more efficient
+ * readers.
+ * As a result of the call, each range will have FileRange.setData(CompletableFuture)
+ * called with a future that when complete will have a ByteBuffer with the
+ * data from the file's range.
+ *
+ * The position returned by getPos() after readVectored() is undefined.
+ *
+ *
+ * If a file is changed while the readVectored() operation is in progress, the output is
+ * undefined. Some ranges may have old data, some may have new and some may have both.
+ *
+ *
+ * While a readVectored() operation is in progress, normal read api calls may block.
+ *
+ * @param ranges the byte ranges to read
+ * @param allocate the function to allocate ByteBuffer
+ * @throws IOException any IOE.
+ */
+ default void readVectored(List extends FileRange> ranges,
+ IntFunction allocate) throws IOException {
+ VectoredReadUtils.readVectored(this, ranges, allocate);
+ }
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/QuotaUsage.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/QuotaUsage.java
index b00a31891c867..b0103562123f4 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/QuotaUsage.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/QuotaUsage.java
@@ -105,7 +105,9 @@ public QuotaUsage build() {
// Make it protected for the deprecated ContentSummary constructor.
protected QuotaUsage() { }
- /** Build the instance based on the builder. */
+ /** Build the instance based on the builder.
+ * @param builder bulider.
+ */
protected QuotaUsage(Builder builder) {
this.fileAndDirectoryCount = builder.fileAndDirectoryCount;
this.quota = builder.quota;
@@ -127,37 +129,67 @@ protected void setSpaceQuota(long spaceQuota) {
this.spaceQuota = spaceQuota;
}
- /** Return the directory count. */
+ /**
+ * Return the directory count.
+ *
+ * @return file and directory count.
+ */
public long getFileAndDirectoryCount() {
return fileAndDirectoryCount;
}
- /** Return the directory quota. */
+ /**
+ * Return the directory quota.
+ *
+ * @return quota.
+ */
public long getQuota() {
return quota;
}
- /** Return (disk) space consumed. */
+ /**
+ * Return (disk) space consumed.
+ *
+ * @return space consumed.
+ */
public long getSpaceConsumed() {
return spaceConsumed;
}
- /** Return (disk) space quota. */
+ /**
+ * Return (disk) space quota.
+ *
+ * @return space quota.
+ */
public long getSpaceQuota() {
return spaceQuota;
}
- /** Return storage type quota. */
+ /**
+ * Return storage type quota.
+ *
+ * @param type storage type.
+ * @return type quota.
+ */
public long getTypeQuota(StorageType type) {
return (typeQuota != null) ? typeQuota[type.ordinal()] : -1L;
}
- /** Return storage type consumed. */
+ /**
+ * Return storage type consumed.
+ *
+ * @param type storage type.
+ * @return type consumed.
+ */
public long getTypeConsumed(StorageType type) {
return (typeConsumed != null) ? typeConsumed[type.ordinal()] : 0L;
}
- /** Return true if any storage type quota has been set. */
+ /**
+ * Return true if any storage type quota has been set.
+ *
+ * @return if any storage type quota has been set true, not false.
+ * */
public boolean isTypeQuotaSet() {
if (typeQuota != null) {
for (StorageType t : StorageType.getTypesSupportingQuota()) {
@@ -169,7 +201,12 @@ public boolean isTypeQuotaSet() {
return false;
}
- /** Return true if any storage type consumption information is available. */
+ /**
+ * Return true if any storage type consumption information is available.
+ *
+ * @return if any storage type consumption information
+ * is available, not false.
+ */
public boolean isTypeConsumedAvailable() {
if (typeConsumed != null) {
for (StorageType t : StorageType.getTypesSupportingQuota()) {
@@ -271,12 +308,15 @@ public String toString(boolean hOption) {
return toString(hOption, false, null);
}
- /** Return the string representation of the object in the output format.
+ /**
+ * Return the string representation of the object in the output format.
* if hOption is false file sizes are returned in bytes
* if hOption is true file sizes are returned in human readable
*
* @param hOption a flag indicating if human readable output if to be used
- * @return the string representation of the object
+ * @param tOption type option.
+ * @param types storage types.
+ * @return the string representation of the object.
*/
public String toString(boolean hOption,
boolean tOption, List types) {
@@ -328,7 +368,7 @@ protected String getTypesQuotaUsage(boolean hOption,
/**
* return the header of with the StorageTypes.
*
- * @param storageTypes
+ * @param storageTypes storage types.
* @return storage header string
*/
public static String getStorageTypeHeader(List storageTypes) {
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/RawLocalFileSystem.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/RawLocalFileSystem.java
index edcc4a8b99e77..2f4f93099b5c9 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/RawLocalFileSystem.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/RawLocalFileSystem.java
@@ -1,4 +1,4 @@
-/**
+/*
* Licensed to the Apache Software Foundation (ASF) under one
* or more contributor license agreements. See the NOTICE file
* distributed with this work for additional information
@@ -33,8 +33,11 @@
import java.io.FileDescriptor;
import java.net.URI;
import java.nio.ByteBuffer;
+import java.nio.channels.AsynchronousFileChannel;
+import java.nio.channels.CompletionHandler;
import java.nio.file.Files;
import java.nio.file.NoSuchFileException;
+import java.nio.file.StandardOpenOption;
import java.nio.file.attribute.BasicFileAttributes;
import java.nio.file.attribute.BasicFileAttributeView;
import java.nio.file.attribute.FileTime;
@@ -44,6 +47,9 @@
import java.util.Optional;
import java.util.StringTokenizer;
import java.util.concurrent.atomic.AtomicLong;
+import java.util.List;
+import java.util.concurrent.CompletableFuture;
+import java.util.function.IntFunction;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
@@ -51,6 +57,8 @@
import org.apache.hadoop.fs.impl.StoreImplementationUtils;
import org.apache.hadoop.fs.permission.FsPermission;
import org.apache.hadoop.fs.statistics.IOStatistics;
+import org.apache.hadoop.fs.statistics.IOStatisticsAggregator;
+import org.apache.hadoop.fs.statistics.IOStatisticsContext;
import org.apache.hadoop.fs.statistics.IOStatisticsSource;
import org.apache.hadoop.fs.statistics.BufferedIOStatisticsOutputStream;
import org.apache.hadoop.fs.statistics.impl.IOStatisticsStore;
@@ -61,6 +69,7 @@
import org.apache.hadoop.util.StringUtils;
import static org.apache.hadoop.fs.impl.PathCapabilitiesSupport.validatePathCapabilityArgs;
+import static org.apache.hadoop.fs.VectoredReadUtils.sortRanges;
import static org.apache.hadoop.fs.statistics.StreamStatisticNames.STREAM_READ_BYTES;
import static org.apache.hadoop.fs.statistics.StreamStatisticNames.STREAM_READ_EXCEPTIONS;
import static org.apache.hadoop.fs.statistics.StreamStatisticNames.STREAM_READ_SEEK_OPERATIONS;
@@ -100,7 +109,12 @@ private Path makeAbsolute(Path f) {
}
}
- /** Convert a path to a File. */
+ /**
+ * Convert a path to a File.
+ *
+ * @param path the path.
+ * @return file.
+ */
public File pathToFile(Path path) {
checkPath(path);
if (!path.isAbsolute()) {
@@ -125,7 +139,9 @@ public void initialize(URI uri, Configuration conf) throws IOException {
class LocalFSFileInputStream extends FSInputStream implements
HasFileDescriptor, IOStatisticsSource, StreamCapabilities {
private FileInputStream fis;
+ private final File name;
private long position;
+ private AsynchronousFileChannel asyncChannel = null;
/**
* Minimal set of counters.
@@ -142,10 +158,19 @@ class LocalFSFileInputStream extends FSInputStream implements
/** Reference to the bytes read counter for slightly faster counting. */
private final AtomicLong bytesRead;
+ /**
+ * Thread level IOStatistics aggregator to update in close().
+ */
+ private final IOStatisticsAggregator
+ ioStatisticsAggregator;
+
public LocalFSFileInputStream(Path f) throws IOException {
- fis = new FileInputStream(pathToFile(f));
+ name = pathToFile(f);
+ fis = new FileInputStream(name);
bytesRead = ioStatistics.getCounterReference(
STREAM_READ_BYTES);
+ ioStatisticsAggregator =
+ IOStatisticsContext.getCurrentIOStatisticsContext().getAggregator();
}
@Override
@@ -174,10 +199,20 @@ public boolean seekToNewSource(long targetPos) throws IOException {
@Override
public int available() throws IOException { return fis.available(); }
@Override
- public void close() throws IOException { fis.close(); }
- @Override
public boolean markSupported() { return false; }
-
+
+ @Override
+ public void close() throws IOException {
+ try {
+ fis.close();
+ if (asyncChannel != null) {
+ asyncChannel.close();
+ }
+ } finally {
+ ioStatisticsAggregator.aggregate(ioStatistics);
+ }
+ }
+
@Override
public int read() throws IOException {
try {
@@ -257,6 +292,8 @@ public boolean hasCapability(String capability) {
// new capabilities.
switch (capability.toLowerCase(Locale.ENGLISH)) {
case StreamCapabilities.IOSTATISTICS:
+ case StreamCapabilities.IOSTATISTICS_CONTEXT:
+ case StreamCapabilities.VECTOREDIO:
return true;
default:
return false;
@@ -267,8 +304,89 @@ public boolean hasCapability(String capability) {
public IOStatistics getIOStatistics() {
return ioStatistics;
}
+
+ AsynchronousFileChannel getAsyncChannel() throws IOException {
+ if (asyncChannel == null) {
+ synchronized (this) {
+ asyncChannel = AsynchronousFileChannel.open(name.toPath(),
+ StandardOpenOption.READ);
+ }
+ }
+ return asyncChannel;
+ }
+
+ @Override
+ public void readVectored(List extends FileRange> ranges,
+ IntFunction allocate) throws IOException {
+
+ List extends FileRange> sortedRanges = Arrays.asList(sortRanges(ranges));
+ // Set up all of the futures, so that we can use them if things fail
+ for(FileRange range: sortedRanges) {
+ VectoredReadUtils.validateRangeRequest(range);
+ range.setData(new CompletableFuture<>());
+ }
+ try {
+ AsynchronousFileChannel channel = getAsyncChannel();
+ ByteBuffer[] buffers = new ByteBuffer[sortedRanges.size()];
+ AsyncHandler asyncHandler = new AsyncHandler(channel, sortedRanges, buffers);
+ for(int i = 0; i < sortedRanges.size(); ++i) {
+ FileRange range = sortedRanges.get(i);
+ buffers[i] = allocate.apply(range.getLength());
+ channel.read(buffers[i], range.getOffset(), i, asyncHandler);
+ }
+ } catch (IOException ioe) {
+ LOG.debug("Exception occurred during vectored read ", ioe);
+ for(FileRange range: sortedRanges) {
+ range.getData().completeExceptionally(ioe);
+ }
+ }
+ }
}
-
+
+ /**
+ * A CompletionHandler that implements readFully and translates back
+ * into the form of CompletionHandler that our users expect.
+ */
+ static class AsyncHandler implements CompletionHandler {
+ private final AsynchronousFileChannel channel;
+ private final List extends FileRange> ranges;
+ private final ByteBuffer[] buffers;
+
+ AsyncHandler(AsynchronousFileChannel channel,
+ List extends FileRange> ranges,
+ ByteBuffer[] buffers) {
+ this.channel = channel;
+ this.ranges = ranges;
+ this.buffers = buffers;
+ }
+
+ @Override
+ public void completed(Integer result, Integer r) {
+ FileRange range = ranges.get(r);
+ ByteBuffer buffer = buffers[r];
+ if (result == -1) {
+ failed(new EOFException("Read past End of File"), r);
+ } else {
+ if (buffer.remaining() > 0) {
+ // issue a read for the rest of the buffer
+ // QQ: What if this fails? It has the same handler.
+ channel.read(buffer, range.getOffset() + buffer.position(), r, this);
+ } else {
+ // QQ: Why is this required? I think because we don't want the
+ // user to read data beyond limit.
+ buffer.flip();
+ range.getData().complete(buffer);
+ }
+ }
+ }
+
+ @Override
+ public void failed(Throwable exc, Integer r) {
+ LOG.debug("Failed while reading range {} ", r, exc);
+ ranges.get(r).getData().completeExceptionally(exc);
+ }
+ }
+
@Override
public FSDataInputStream open(Path f, int bufferSize) throws IOException {
getFileStatus(f);
@@ -304,9 +422,19 @@ final class LocalFSFileOutputStream extends OutputStream implements
STREAM_WRITE_EXCEPTIONS)
.build();
+ /**
+ * Thread level IOStatistics aggregator to update in close().
+ */
+ private final IOStatisticsAggregator
+ ioStatisticsAggregator;
+
private LocalFSFileOutputStream(Path f, boolean append,
FsPermission permission) throws IOException {
File file = pathToFile(f);
+ // store the aggregator before attempting any IO.
+ ioStatisticsAggregator =
+ IOStatisticsContext.getCurrentIOStatisticsContext().getAggregator();
+
if (!append && permission == null) {
permission = FsPermission.getFileDefault();
}
@@ -333,10 +461,17 @@ private LocalFSFileOutputStream(Path f, boolean append,
}
/*
- * Just forward to the fos
+ * Close the fos; update the IOStatisticsContext.
*/
@Override
- public void close() throws IOException { fos.close(); }
+ public void close() throws IOException {
+ try {
+ fos.close();
+ } finally {
+ ioStatisticsAggregator.aggregate(ioStatistics);
+ }
+ }
+
@Override
public void flush() throws IOException { fos.flush(); }
@Override
@@ -382,6 +517,7 @@ public boolean hasCapability(String capability) {
// new capabilities.
switch (capability.toLowerCase(Locale.ENGLISH)) {
case StreamCapabilities.IOSTATISTICS:
+ case StreamCapabilities.IOSTATISTICS_CONTEXT:
return true;
default:
return StoreImplementationUtils.isProbeForSyncable(capability);
@@ -1190,4 +1326,9 @@ public boolean hasPathCapability(final Path path, final String capability)
return super.hasPathCapability(path, capability);
}
}
+
+ @VisibleForTesting
+ static void setUseDeprecatedFileStatus(boolean useDeprecatedFileStatus) {
+ RawLocalFileSystem.useDeprecatedFileStatus = useDeprecatedFileStatus;
+ }
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Seekable.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Seekable.java
index 919c857ffa628..f7546d58e6084 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Seekable.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Seekable.java
@@ -32,17 +32,27 @@ public interface Seekable {
* Seek to the given offset from the start of the file.
* The next read() will be from that location. Can't
* seek past the end of the file.
+ *
+ * @param pos offset from the start of the file.
+ * @throws IOException raised on errors performing I/O.
*/
void seek(long pos) throws IOException;
-
+
/**
* Return the current offset from the start of the file
+ *
+ * @return offset from the start of the file.
+ * @throws IOException raised on errors performing I/O.
*/
long getPos() throws IOException;
/**
- * Seeks a different copy of the data. Returns true if
+ * Seeks a different copy of the data. Returns true if
* found a new source, false otherwise.
+ *
+ * @param targetPos target position.
+ * @return true if found a new source, false otherwise.
+ * @throws IOException raised on errors performing I/O.
*/
@InterfaceAudience.Private
boolean seekToNewSource(long targetPos) throws IOException;
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Stat.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Stat.java
index 07f0513290014..72a45309b17c9 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Stat.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Stat.java
@@ -73,8 +73,8 @@ public FileStatus getFileStatus() throws IOException {
}
/**
- * Whether Stat is supported on the current platform
- * @return
+ * Whether Stat is supported on the current platform.
+ * @return if is available true, not false.
*/
public static boolean isAvailable() {
if (Shell.LINUX || Shell.FREEBSD || Shell.MAC) {
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/StorageStatistics.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/StorageStatistics.java
index 2efe4566344ee..b4a86ab781280 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/StorageStatistics.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/StorageStatistics.java
@@ -127,6 +127,7 @@ public StorageStatistics(String name) {
/**
* Get the name of this StorageStatistics object.
+ * @return name of this StorageStatistics object
*/
public String getName() {
return name;
@@ -145,12 +146,15 @@ public String getScheme() {
*
* The values returned will depend on the type of FileSystem or FileContext
* object. The values do not necessarily reflect a snapshot in time.
+ *
+ * @return LongStatistic Iterator.
*/
public abstract Iterator getLongStatistics();
/**
* Get the value of a statistic.
*
+ * @param key key.
* @return null if the statistic is not being tracked or is not a
* long statistic. The value of the statistic, otherwise.
*/
@@ -159,6 +163,7 @@ public String getScheme() {
/**
* Return true if a statistic is being tracked.
*
+ * @param key key.
* @return True only if the statistic is being tracked.
*/
public abstract boolean isTracked(String key);
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/StreamCapabilities.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/StreamCapabilities.java
index 861178019505e..93ed57ef83057 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/StreamCapabilities.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/StreamCapabilities.java
@@ -80,6 +80,12 @@ public interface StreamCapabilities {
*/
String IOSTATISTICS = "iostatistics";
+ /**
+ * Support for vectored IO api.
+ * See {@code PositionedReadable#readVectored(List, IntFunction)}.
+ */
+ String VECTOREDIO = "in:readvectored";
+
/**
* Stream abort() capability implemented by {@link Abortable#abort()}.
* This matches the Path Capability
@@ -87,6 +93,12 @@ public interface StreamCapabilities {
*/
String ABORTABLE_STREAM = CommonPathCapabilities.ABORTABLE_STREAM;
+ /**
+ * Streams that support IOStatistics context and capture thread-level
+ * IOStatistics.
+ */
+ String IOSTATISTICS_CONTEXT = "fs.capability.iocontext.supported";
+
/**
* Capabilities that a stream can support and be queried for.
*/
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Trash.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Trash.java
index e29cb9a4e0e33..5c5fa0237ea66 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Trash.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/Trash.java
@@ -23,8 +23,10 @@
import org.apache.hadoop.classification.InterfaceStability;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
+import org.apache.hadoop.fs.viewfs.ViewFileSystem;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
+import static org.apache.hadoop.fs.viewfs.Constants.*;
/**
* Provides a trash facility which supports pluggable Trash policies.
@@ -43,6 +45,7 @@ public class Trash extends Configured {
/**
* Construct a trash can accessor.
* @param conf a Configuration
+ * @throws IOException raised on errors performing I/O.
*/
public Trash(Configuration conf) throws IOException {
this(FileSystem.get(conf), conf);
@@ -52,6 +55,7 @@ public Trash(Configuration conf) throws IOException {
* Construct a trash can accessor for the FileSystem provided.
* @param fs the FileSystem
* @param conf a Configuration
+ * @throws IOException raised on errors performing I/O.
*/
public Trash(FileSystem fs, Configuration conf) throws IOException {
super(conf);
@@ -92,52 +96,100 @@ public static boolean moveToAppropriateTrash(FileSystem fs, Path p,
LOG.warn("Failed to get server trash configuration", e);
throw new IOException("Failed to get server trash configuration", e);
}
+
+ /*
+ * In HADOOP-18144, we changed getTrashRoot() in ViewFileSystem to return a
+ * viewFS path, instead of a targetFS path. moveToTrash works for
+ * ViewFileSystem now. ViewFileSystem will do path resolution internally by
+ * itself.
+ *
+ * When localized trash flag is enabled:
+ * 1). if fs is a ViewFileSystem, we can initialize Trash() with a
+ * ViewFileSystem object;
+ * 2). When fs is not a ViewFileSystem, the only place we would need to
+ * resolve a path is for symbolic links. However, symlink is not
+ * enabled in Hadoop due to the complexity to support it
+ * (HADOOP-10019).
+ */
+ if (conf.getBoolean(CONFIG_VIEWFS_TRASH_FORCE_INSIDE_MOUNT_POINT,
+ CONFIG_VIEWFS_TRASH_FORCE_INSIDE_MOUNT_POINT_DEFAULT)) {
+ Trash trash = new Trash(fs, conf);
+ return trash.moveToTrash(p);
+ }
+
Trash trash = new Trash(fullyResolvedFs, conf);
return trash.moveToTrash(fullyResolvedPath);
}
/**
- * Returns whether the trash is enabled for this filesystem
+ * Returns whether the trash is enabled for this filesystem.
+ *
+ * @return return if isEnabled true,not false.
*/
public boolean isEnabled() {
return trashPolicy.isEnabled();
}
/** Move a file or directory to the current trash directory.
+ *
+ * @param path the path.
* @return false if the item is already in the trash or trash is disabled
+ * @throws IOException raised on errors performing I/O.
*/
public boolean moveToTrash(Path path) throws IOException {
return trashPolicy.moveToTrash(path);
}
- /** Create a trash checkpoint. */
+ /**
+ * Create a trash checkpoint.
+ * @throws IOException raised on errors performing I/O.
+ */
public void checkpoint() throws IOException {
trashPolicy.createCheckpoint();
}
- /** Delete old checkpoint(s). */
+ /**
+ * Delete old checkpoint(s).
+ * @throws IOException raised on errors performing I/O.
+ */
public void expunge() throws IOException {
trashPolicy.deleteCheckpoint();
}
- /** Delete all trash immediately. */
+ /**
+ * Delete all trash immediately.
+ * @throws IOException raised on errors performing I/O.
+ */
public void expungeImmediately() throws IOException {
trashPolicy.createCheckpoint();
trashPolicy.deleteCheckpointsImmediately();
}
- /** get the current working directory */
+ /**
+ * get the current working directory.
+ *
+ * @throws IOException on raised on errors performing I/O.
+ * @return Trash Dir.
+ */
Path getCurrentTrashDir() throws IOException {
return trashPolicy.getCurrentTrashDir();
}
- /** get the configured trash policy */
+ /**
+ * get the configured trash policy.
+ *
+ * @return TrashPolicy.
+ */
TrashPolicy getTrashPolicy() {
return trashPolicy;
}
- /** Return a {@link Runnable} that periodically empties the trash of all
+ /**
+ * Return a {@link Runnable} that periodically empties the trash of all
* users, intended to be run by the superuser.
+ *
+ * @throws IOException on raised on errors performing I/O.
+ * @return Runnable.
*/
public Runnable getEmptier() throws IOException {
return trashPolicy.getEmptier();
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/TrashPolicy.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/TrashPolicy.java
index 64fb81be99ee3..35e51f9e1cfb1 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/TrashPolicy.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/TrashPolicy.java
@@ -60,27 +60,34 @@ public void initialize(Configuration conf, FileSystem fs) {
/**
* Returns whether the Trash Policy is enabled for this filesystem.
+ *
+ * @return if isEnabled true,not false.
*/
public abstract boolean isEnabled();
/**
* Move a file or directory to the current trash directory.
+ * @param path the path.
* @return false if the item is already in the trash or trash is disabled
+ * @throws IOException raised on errors performing I/O.
*/
public abstract boolean moveToTrash(Path path) throws IOException;
/**
- * Create a trash checkpoint.
+ * Create a trash checkpoint.
+ * @throws IOException raised on errors performing I/O.
*/
public abstract void createCheckpoint() throws IOException;
/**
* Delete old trash checkpoint(s).
+ * @throws IOException raised on errors performing I/O.
*/
public abstract void deleteCheckpoint() throws IOException;
/**
* Delete all checkpoints immediately, ie empty trash.
+ * @throws IOException raised on errors performing I/O.
*/
public abstract void deleteCheckpointsImmediately() throws IOException;
@@ -94,6 +101,8 @@ public void initialize(Configuration conf, FileSystem fs) {
* TrashPolicy#getCurrentTrashDir(Path path).
* It returns the trash location correctly for the path specified no matter
* the path is in encryption zone or not.
+ *
+ * @return the path.
*/
public abstract Path getCurrentTrashDir();
@@ -102,7 +111,7 @@ public void initialize(Configuration conf, FileSystem fs) {
* Policy
* @param path path to be deleted
* @return current trash directory for the path to be deleted
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
public Path getCurrentTrashDir(Path path) throws IOException {
throw new UnsupportedOperationException();
@@ -111,6 +120,9 @@ public Path getCurrentTrashDir(Path path) throws IOException {
/**
* Return a {@link Runnable} that periodically empties the trash of all
* users, intended to be run by the superuser.
+ *
+ * @throws IOException raised on errors performing I/O.
+ * @return Runnable.
*/
public abstract Runnable getEmptier() throws IOException;
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/TrashPolicyDefault.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/TrashPolicyDefault.java
index 99467f5633625..f4228dea69f49 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/TrashPolicyDefault.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/TrashPolicyDefault.java
@@ -191,8 +191,8 @@ public boolean moveToTrash(Path path) throws IOException {
cause = e;
}
}
- throw (IOException)
- new IOException("Failed to move to trash: " + path).initCause(cause);
+ throw new IOException("Failed to move " + path + " to trash " + trashPath,
+ cause);
}
@SuppressWarnings("deprecation")
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/VectoredReadUtils.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/VectoredReadUtils.java
new file mode 100644
index 0000000000000..cf1b1ef969863
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/VectoredReadUtils.java
@@ -0,0 +1,329 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs;
+
+import java.io.EOFException;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Comparator;
+import java.util.List;
+import java.util.concurrent.CompletableFuture;
+import java.util.function.IntFunction;
+
+import org.apache.hadoop.fs.impl.CombinedFileRange;
+import org.apache.hadoop.util.Preconditions;
+import org.apache.hadoop.util.functional.Function4RaisingIOE;
+
+/**
+ * Utility class which implements helper methods used
+ * in vectored IO implementation.
+ */
+public final class VectoredReadUtils {
+
+ private static final int TMP_BUFFER_MAX_SIZE = 64 * 1024;
+
+ /**
+ * Validate a single range.
+ * @param range file range.
+ * @throws EOFException any EOF Exception.
+ */
+ public static void validateRangeRequest(FileRange range)
+ throws EOFException {
+
+ Preconditions.checkArgument(range.getLength() >= 0, "length is negative");
+ if (range.getOffset() < 0) {
+ throw new EOFException("position is negative");
+ }
+ }
+
+ /**
+ * Validate a list of vectored read ranges.
+ * @param ranges list of ranges.
+ * @throws EOFException any EOF exception.
+ */
+ public static void validateVectoredReadRanges(List extends FileRange> ranges)
+ throws EOFException {
+ for (FileRange range : ranges) {
+ validateRangeRequest(range);
+ }
+ }
+
+
+
+ /**
+ * This is the default implementation which iterates through the ranges
+ * to read each synchronously, but the intent is that subclasses
+ * can make more efficient readers.
+ * The data or exceptions are pushed into {@link FileRange#getData()}.
+ * @param stream the stream to read the data from
+ * @param ranges the byte ranges to read
+ * @param allocate the byte buffer allocation
+ */
+ public static void readVectored(PositionedReadable stream,
+ List extends FileRange> ranges,
+ IntFunction allocate) {
+ for (FileRange range: ranges) {
+ range.setData(readRangeFrom(stream, range, allocate));
+ }
+ }
+
+ /**
+ * Synchronously reads a range from the stream dealing with the combinations
+ * of ByteBuffers buffers and PositionedReadable streams.
+ * @param stream the stream to read from
+ * @param range the range to read
+ * @param allocate the function to allocate ByteBuffers
+ * @return the CompletableFuture that contains the read data
+ */
+ public static CompletableFuture readRangeFrom(PositionedReadable stream,
+ FileRange range,
+ IntFunction allocate) {
+ CompletableFuture result = new CompletableFuture<>();
+ try {
+ ByteBuffer buffer = allocate.apply(range.getLength());
+ if (stream instanceof ByteBufferPositionedReadable) {
+ ((ByteBufferPositionedReadable) stream).readFully(range.getOffset(),
+ buffer);
+ buffer.flip();
+ } else {
+ readNonByteBufferPositionedReadable(stream, range, buffer);
+ }
+ result.complete(buffer);
+ } catch (IOException ioe) {
+ result.completeExceptionally(ioe);
+ }
+ return result;
+ }
+
+ private static void readNonByteBufferPositionedReadable(PositionedReadable stream,
+ FileRange range,
+ ByteBuffer buffer) throws IOException {
+ if (buffer.isDirect()) {
+ readInDirectBuffer(range.getLength(),
+ buffer,
+ (position, buffer1, offset, length) -> {
+ stream.readFully(position, buffer1, offset, length);
+ return null;
+ });
+ buffer.flip();
+ } else {
+ stream.readFully(range.getOffset(), buffer.array(),
+ buffer.arrayOffset(), range.getLength());
+ }
+ }
+
+ /**
+ * Read bytes from stream into a byte buffer using an
+ * intermediate byte array.
+ * @param length number of bytes to read.
+ * @param buffer buffer to fill.
+ * @param operation operation to use for reading data.
+ * @throws IOException any IOE.
+ */
+ public static void readInDirectBuffer(int length,
+ ByteBuffer buffer,
+ Function4RaisingIOE operation) throws IOException {
+ if (length == 0) {
+ return;
+ }
+ int readBytes = 0;
+ int position = 0;
+ int tmpBufferMaxSize = Math.min(TMP_BUFFER_MAX_SIZE, length);
+ byte[] tmp = new byte[tmpBufferMaxSize];
+ while (readBytes < length) {
+ int currentLength = (readBytes + tmpBufferMaxSize) < length ?
+ tmpBufferMaxSize
+ : (length - readBytes);
+ operation.apply(position, tmp, 0, currentLength);
+ buffer.put(tmp, 0, currentLength);
+ position = position + currentLength;
+ readBytes = readBytes + currentLength;
+ }
+ }
+
+ /**
+ * Is the given input list.
+ *
+ *
already sorted by offset
+ *
each range is more than minimumSeek apart
+ *
the start and end of each range is a multiple of chunkSize
+ *
+ *
+ * @param input the list of input ranges.
+ * @param chunkSize the size of the chunks that the offset and end must align to.
+ * @param minimumSeek the minimum distance between ranges.
+ * @return true if we can use the input list as is.
+ */
+ public static boolean isOrderedDisjoint(List extends FileRange> input,
+ int chunkSize,
+ int minimumSeek) {
+ long previous = -minimumSeek;
+ for (FileRange range: input) {
+ long offset = range.getOffset();
+ long end = range.getOffset() + range.getLength();
+ if (offset % chunkSize != 0 ||
+ end % chunkSize != 0 ||
+ (offset - previous < minimumSeek)) {
+ return false;
+ }
+ previous = end;
+ }
+ return true;
+ }
+
+ /**
+ * Calculates floor value of offset based on chunk size.
+ * @param offset file offset.
+ * @param chunkSize file chunk size.
+ * @return floor value.
+ */
+ public static long roundDown(long offset, int chunkSize) {
+ if (chunkSize > 1) {
+ return offset - (offset % chunkSize);
+ } else {
+ return offset;
+ }
+ }
+
+ /**
+ * Calculates the ceil value of offset based on chunk size.
+ * @param offset file offset.
+ * @param chunkSize file chunk size.
+ * @return ceil value.
+ */
+ public static long roundUp(long offset, int chunkSize) {
+ if (chunkSize > 1) {
+ long next = offset + chunkSize - 1;
+ return next - (next % chunkSize);
+ } else {
+ return offset;
+ }
+ }
+
+ /**
+ * Check if the input ranges are overlapping in nature.
+ * We call two ranges to be overlapping when start offset
+ * of second is less than the end offset of first.
+ * End offset is calculated as start offset + length.
+ * @param input list if input ranges.
+ * @return true/false based on logic explained above.
+ */
+ public static List extends FileRange> validateNonOverlappingAndReturnSortedRanges(
+ List extends FileRange> input) {
+
+ if (input.size() <= 1) {
+ return input;
+ }
+ FileRange[] sortedRanges = sortRanges(input);
+ FileRange prev = sortedRanges[0];
+ for (int i=1; i input) {
+ FileRange[] sortedRanges = input.toArray(new FileRange[0]);
+ Arrays.sort(sortedRanges, Comparator.comparingLong(FileRange::getOffset));
+ return sortedRanges;
+ }
+
+ /**
+ * Merge sorted ranges to optimize the access from the underlying file
+ * system.
+ * The motivations are that:
+ *
+ *
Upper layers want to pass down logical file ranges.
+ *
Fewer reads have better performance.
+ *
Applications want callbacks as ranges are read.
+ *
Some file systems want to round ranges to be at checksum boundaries.
+ *
+ *
+ * @param sortedRanges already sorted list of ranges based on offset.
+ * @param chunkSize round the start and end points to multiples of chunkSize
+ * @param minimumSeek the smallest gap that we should seek over in bytes
+ * @param maxSize the largest combined file range in bytes
+ * @return the list of sorted CombinedFileRanges that cover the input
+ */
+ public static List mergeSortedRanges(List extends FileRange> sortedRanges,
+ int chunkSize,
+ int minimumSeek,
+ int maxSize) {
+
+ CombinedFileRange current = null;
+ List result = new ArrayList<>(sortedRanges.size());
+
+ // now merge together the ones that merge
+ for (FileRange range: sortedRanges) {
+ long start = roundDown(range.getOffset(), chunkSize);
+ long end = roundUp(range.getOffset() + range.getLength(), chunkSize);
+ if (current == null || !current.merge(start, end, range, minimumSeek, maxSize)) {
+ current = new CombinedFileRange(start, end, range);
+ result.add(current);
+ }
+ }
+ return result;
+ }
+
+ /**
+ * Slice the data that was read to the user's request.
+ * This function assumes that the user's request is completely subsumed by the
+ * read data. This always creates a new buffer pointing to the same underlying
+ * data but with its own mark and position fields such that reading one buffer
+ * can't effect other's mark and position.
+ * @param readData the buffer with the readData
+ * @param readOffset the offset in the file for the readData
+ * @param request the user's request
+ * @return the readData buffer that is sliced to the user's request
+ */
+ public static ByteBuffer sliceTo(ByteBuffer readData, long readOffset,
+ FileRange request) {
+ int offsetChange = (int) (request.getOffset() - readOffset);
+ int requestLength = request.getLength();
+ // Create a new buffer that is backed by the original contents
+ // The buffer will have position 0 and the same limit as the original one
+ readData = readData.slice();
+ // Change the offset and the limit of the buffer as the reader wants to see
+ // only relevant data
+ readData.position(offsetChange);
+ readData.limit(offsetChange + requestLength);
+ // Create a new buffer after the limit change so that only that portion of the data is
+ // returned to the reader.
+ readData = readData.slice();
+ return readData;
+ }
+
+ /**
+ * private constructor.
+ */
+ private VectoredReadUtils() {
+ throw new UnsupportedOperationException();
+ }
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/XAttrCodec.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/XAttrCodec.java
index 3d65275e673d6..df878d998706c 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/XAttrCodec.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/XAttrCodec.java
@@ -67,7 +67,7 @@ public enum XAttrCodec {
* the given string is treated as text.
* @param value string representation of the value.
* @return byte[] the value
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
public static byte[] decodeValue(String value) throws IOException {
byte[] result = null;
@@ -102,9 +102,9 @@ public static byte[] decodeValue(String value) throws IOException {
* while strings encoded as hexadecimal and base64 are prefixed with
* 0x and 0s, respectively.
* @param value byte[] value
- * @param encoding
+ * @param encoding encoding.
* @return String string representation of value
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
public static String encodeValue(byte[] value, XAttrCodec encoding)
throws IOException {
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/audit/AuditConstants.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/audit/AuditConstants.java
index d9629e388b384..0929c2be03acf 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/audit/AuditConstants.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/audit/AuditConstants.java
@@ -90,6 +90,11 @@ private AuditConstants() {
*/
public static final String PARAM_PROCESS = "ps";
+ /**
+ * Task Attempt ID query header: {@value}.
+ */
+ public static final String PARAM_TASK_ATTEMPT_ID = "ta";
+
/**
* Thread 0: the thread which created a span {@value}.
*/
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/audit/CommonAuditContext.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/audit/CommonAuditContext.java
index 11681546e3d0a..2dcd4f8b3f570 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/audit/CommonAuditContext.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/audit/CommonAuditContext.java
@@ -24,6 +24,9 @@
import java.util.concurrent.ConcurrentHashMap;
import java.util.function.Supplier;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
@@ -69,11 +72,16 @@
* {@link #currentAuditContext()} to get the thread-local
* context for the caller, which can then be manipulated.
*
+ * For further information, especially related to memory consumption,
+ * read the document `auditing_architecture` in the `hadoop-aws` module.
*/
@InterfaceAudience.Public
@InterfaceStability.Unstable
public final class CommonAuditContext {
+ private static final Logger LOG = LoggerFactory.getLogger(
+ CommonAuditContext.class);
+
/**
* Process ID; currently built from UUID and timestamp.
*/
@@ -92,7 +100,7 @@ public final class CommonAuditContext {
* Supplier operations must themselves be thread safe.
*/
private final Map> evaluatedEntries =
- new ConcurrentHashMap<>();
+ new ConcurrentHashMap<>(1);
static {
// process ID is fixed.
@@ -108,7 +116,7 @@ public final class CommonAuditContext {
* the span is finalized.
*/
private static final ThreadLocal ACTIVE_CONTEXT =
- ThreadLocal.withInitial(() -> createInstance());
+ ThreadLocal.withInitial(CommonAuditContext::createInstance);
private CommonAuditContext() {
}
@@ -116,20 +124,34 @@ private CommonAuditContext() {
/**
* Put a context entry.
* @param key key
- * @param value new value
+ * @param value new value., If null, triggers removal.
* @return old value or null
*/
public Supplier put(String key, String value) {
- return evaluatedEntries.put(key, () -> value);
+ if (value != null) {
+ return evaluatedEntries.put(key, () -> value);
+ } else {
+ return evaluatedEntries.remove(key);
+ }
}
/**
* Put a context entry dynamically evaluated on demand.
+ * Important: as these supplier methods are long-lived,
+ * the supplier function MUST NOT be part of/refer to
+ * any object instance of significant memory size.
+ * Applications SHOULD remove references when they are
+ * no longer needed.
+ * When logged at TRACE, prints the key and stack trace of the caller,
+ * to allow for debugging of any problems.
* @param key key
* @param value new value
* @return old value or null
*/
public Supplier put(String key, Supplier value) {
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("Adding context entry {}", key, new Exception(key));
+ }
return evaluatedEntries.put(key, value);
}
@@ -138,6 +160,9 @@ public Supplier put(String key, Supplier value) {
* @param key key
*/
public void remove(String key) {
+ if (LOG.isTraceEnabled()) {
+ LOG.trace("Remove context entry {}", key);
+ }
evaluatedEntries.remove(key);
}
@@ -168,7 +193,7 @@ public void reset() {
private void init() {
// thread 1 is dynamic
- put(PARAM_THREAD1, () -> currentThreadID());
+ put(PARAM_THREAD1, CommonAuditContext::currentThreadID);
}
/**
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/AbstractFSBuilderImpl.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/AbstractFSBuilderImpl.java
index c69e7afe4e36e..4256522b2a372 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/AbstractFSBuilderImpl.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/AbstractFSBuilderImpl.java
@@ -46,7 +46,7 @@
*
* .opt("foofs:option.a", true)
* .opt("foofs:option.b", "value")
- * .opt("barfs:cache", true)
+ * .opt("fs.s3a.open.option.etag", "9fe4c37c25b")
* .must("foofs:cache", true)
* .must("barfs:cache-size", 256 * 1024 * 1024)
* .build();
@@ -88,6 +88,9 @@
/** Keep track of the keys for mandatory options. */
private final Set mandatoryKeys = new HashSet<>();
+ /** Keep track of the optional keys. */
+ private final Set optionalKeys = new HashSet<>();
+
/**
* Constructor with both optional path and path handle.
* Either or both argument may be empty, but it is an error for
@@ -163,6 +166,7 @@ public PathHandle getPathHandle() {
@Override
public B opt(@Nonnull final String key, @Nonnull final String value) {
mandatoryKeys.remove(key);
+ optionalKeys.add(key);
options.set(key, value);
return getThisBuilder();
}
@@ -175,6 +179,7 @@ public B opt(@Nonnull final String key, @Nonnull final String value) {
@Override
public B opt(@Nonnull final String key, boolean value) {
mandatoryKeys.remove(key);
+ optionalKeys.add(key);
options.setBoolean(key, value);
return getThisBuilder();
}
@@ -187,10 +192,19 @@ public B opt(@Nonnull final String key, boolean value) {
@Override
public B opt(@Nonnull final String key, int value) {
mandatoryKeys.remove(key);
+ optionalKeys.add(key);
options.setInt(key, value);
return getThisBuilder();
}
+ @Override
+ public B opt(@Nonnull final String key, final long value) {
+ mandatoryKeys.remove(key);
+ optionalKeys.add(key);
+ options.setLong(key, value);
+ return getThisBuilder();
+ }
+
/**
* Set optional float parameter for the Builder.
*
@@ -199,6 +213,7 @@ public B opt(@Nonnull final String key, int value) {
@Override
public B opt(@Nonnull final String key, float value) {
mandatoryKeys.remove(key);
+ optionalKeys.add(key);
options.setFloat(key, value);
return getThisBuilder();
}
@@ -211,6 +226,7 @@ public B opt(@Nonnull final String key, float value) {
@Override
public B opt(@Nonnull final String key, double value) {
mandatoryKeys.remove(key);
+ optionalKeys.add(key);
options.setDouble(key, value);
return getThisBuilder();
}
@@ -223,6 +239,7 @@ public B opt(@Nonnull final String key, double value) {
@Override
public B opt(@Nonnull final String key, @Nonnull final String... values) {
mandatoryKeys.remove(key);
+ optionalKeys.add(key);
options.setStrings(key, values);
return getThisBuilder();
}
@@ -248,6 +265,7 @@ public B must(@Nonnull final String key, @Nonnull final String value) {
@Override
public B must(@Nonnull final String key, boolean value) {
mandatoryKeys.add(key);
+ optionalKeys.remove(key);
options.setBoolean(key, value);
return getThisBuilder();
}
@@ -260,10 +278,19 @@ public B must(@Nonnull final String key, boolean value) {
@Override
public B must(@Nonnull final String key, int value) {
mandatoryKeys.add(key);
+ optionalKeys.remove(key);
options.setInt(key, value);
return getThisBuilder();
}
+ @Override
+ public B must(@Nonnull final String key, final long value) {
+ mandatoryKeys.add(key);
+ optionalKeys.remove(key);
+ options.setLong(key, value);
+ return getThisBuilder();
+ }
+
/**
* Set mandatory float option.
*
@@ -272,6 +299,7 @@ public B must(@Nonnull final String key, int value) {
@Override
public B must(@Nonnull final String key, float value) {
mandatoryKeys.add(key);
+ optionalKeys.remove(key);
options.setFloat(key, value);
return getThisBuilder();
}
@@ -284,6 +312,7 @@ public B must(@Nonnull final String key, float value) {
@Override
public B must(@Nonnull final String key, double value) {
mandatoryKeys.add(key);
+ optionalKeys.remove(key);
options.setDouble(key, value);
return getThisBuilder();
}
@@ -296,6 +325,7 @@ public B must(@Nonnull final String key, double value) {
@Override
public B must(@Nonnull final String key, @Nonnull final String... values) {
mandatoryKeys.add(key);
+ optionalKeys.remove(key);
options.setStrings(key, values);
return getThisBuilder();
}
@@ -310,10 +340,18 @@ public Configuration getOptions() {
/**
* Get all the keys that are set as mandatory keys.
+ * @return mandatory keys.
*/
public Set getMandatoryKeys() {
return Collections.unmodifiableSet(mandatoryKeys);
}
+ /**
+ * Get all the keys that are set as optional keys.
+ * @return optional keys.
+ */
+ public Set getOptionalKeys() {
+ return Collections.unmodifiableSet(optionalKeys);
+ }
/**
* Reject a configuration if one or more mandatory keys are
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/AbstractMultipartUploader.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/AbstractMultipartUploader.java
index 416924e18d87c..f9ae9f55cc17f 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/AbstractMultipartUploader.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/AbstractMultipartUploader.java
@@ -127,7 +127,7 @@ protected void checkPutArguments(Path filePath,
* {@inheritDoc}.
* @param path path to abort uploads under.
* @return a future to -1.
- * @throws IOException
+ * @throws IOException raised on errors performing I/O.
*/
public CompletableFuture abortUploadsUnderPath(Path path)
throws IOException {
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/CombinedFileRange.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/CombinedFileRange.java
new file mode 100644
index 0000000000000..c9555a1e5414e
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/CombinedFileRange.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.impl;
+
+import org.apache.hadoop.fs.FileRange;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * A file range that represents a set of underlying file ranges.
+ * This is used when we combine the user's FileRange objects
+ * together into a single read for efficiency.
+ */
+public class CombinedFileRange extends FileRangeImpl {
+ private List underlying = new ArrayList<>();
+
+ public CombinedFileRange(long offset, long end, FileRange original) {
+ super(offset, (int) (end - offset), null);
+ this.underlying.add(original);
+ }
+
+ /**
+ * Get the list of ranges that were merged together to form this one.
+ * @return the list of input ranges
+ */
+ public List getUnderlying() {
+ return underlying;
+ }
+
+ /**
+ * Merge this input range into the current one, if it is compatible.
+ * It is assumed that otherOffset is greater or equal the current offset,
+ * which typically happens by sorting the input ranges on offset.
+ * @param otherOffset the offset to consider merging
+ * @param otherEnd the end to consider merging
+ * @param other the underlying FileRange to add if we merge
+ * @param minSeek the minimum distance that we'll seek without merging the
+ * ranges together
+ * @param maxSize the maximum size that we'll merge into a single range
+ * @return true if we have merged the range into this one
+ */
+ public boolean merge(long otherOffset, long otherEnd, FileRange other,
+ int minSeek, int maxSize) {
+ long end = this.getOffset() + this.getLength();
+ long newEnd = Math.max(end, otherEnd);
+ if (otherOffset - end >= minSeek || newEnd - this.getOffset() > maxSize) {
+ return false;
+ }
+ this.setLength((int) (newEnd - this.getOffset()));
+ underlying.add(other);
+ return true;
+ }
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FileRangeImpl.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FileRangeImpl.java
new file mode 100644
index 0000000000000..1239be764ba5c
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FileRangeImpl.java
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.fs.impl;
+
+import java.nio.ByteBuffer;
+import java.util.concurrent.CompletableFuture;
+
+import org.apache.hadoop.classification.InterfaceAudience;
+import org.apache.hadoop.fs.FileRange;
+
+/**
+ * A range of bytes from a file with an optional buffer to read those bytes
+ * for zero copy. This shouldn't be created directly via constructor rather
+ * factory defined in {@code FileRange#createFileRange} should be used.
+ */
+@InterfaceAudience.Private
+public class FileRangeImpl implements FileRange {
+ private long offset;
+ private int length;
+ private CompletableFuture reader;
+
+ /**
+ * nullable reference to store in the range.
+ */
+ private final Object reference;
+
+ /**
+ * Create.
+ * @param offset offset in file
+ * @param length length of data to read.
+ * @param reference nullable reference to store in the range.
+ */
+ public FileRangeImpl(long offset, int length, Object reference) {
+ this.offset = offset;
+ this.length = length;
+ this.reference = reference;
+ }
+
+ @Override
+ public String toString() {
+ return "range[" + offset + "," + (offset + length) + ")";
+ }
+
+ @Override
+ public long getOffset() {
+ return offset;
+ }
+
+ @Override
+ public int getLength() {
+ return length;
+ }
+
+ public void setOffset(long offset) {
+ this.offset = offset;
+ }
+
+ public void setLength(int length) {
+ this.length = length;
+ }
+
+ @Override
+ public void setData(CompletableFuture pReader) {
+ this.reader = pReader;
+ }
+
+ @Override
+ public CompletableFuture getData() {
+ return reader;
+ }
+
+ @Override
+ public Object getReference() {
+ return reference;
+ }
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FileSystemMultipartUploader.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FileSystemMultipartUploader.java
index 2339e42128973..481d927672dc3 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FileSystemMultipartUploader.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FileSystemMultipartUploader.java
@@ -51,6 +51,7 @@
import org.apache.hadoop.fs.PathHandle;
import org.apache.hadoop.fs.UploadHandle;
import org.apache.hadoop.fs.permission.FsPermission;
+import org.apache.hadoop.util.functional.FutureIO;
import static org.apache.hadoop.fs.Path.mergePaths;
import static org.apache.hadoop.io.IOUtils.cleanupWithLogger;
@@ -98,7 +99,7 @@ public FileSystemMultipartUploader(
public CompletableFuture startUpload(Path filePath)
throws IOException {
checkPath(filePath);
- return FutureIOSupport.eval(() -> {
+ return FutureIO.eval(() -> {
Path collectorPath = createCollectorPath(filePath);
fs.mkdirs(collectorPath, FsPermission.getDirDefault());
@@ -116,7 +117,7 @@ public CompletableFuture putPart(UploadHandle uploadId,
throws IOException {
checkPutArguments(filePath, inputStream, partNumber, uploadId,
lengthInBytes);
- return FutureIOSupport.eval(() -> innerPutPart(filePath,
+ return FutureIO.eval(() -> innerPutPart(filePath,
inputStream, partNumber, uploadId, lengthInBytes));
}
@@ -179,7 +180,7 @@ public CompletableFuture complete(
Map handleMap) throws IOException {
checkPath(filePath);
- return FutureIOSupport.eval(() ->
+ return FutureIO.eval(() ->
innerComplete(uploadId, filePath, handleMap));
}
@@ -251,7 +252,7 @@ public CompletableFuture abort(UploadHandle uploadId,
Path collectorPath = new Path(new String(uploadIdByteArray, 0,
uploadIdByteArray.length, Charsets.UTF_8));
- return FutureIOSupport.eval(() -> {
+ return FutureIO.eval(() -> {
// force a check for a file existing; raises FNFE if not found
fs.getFileStatus(collectorPath);
fs.delete(collectorPath, true);
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FutureDataInputStreamBuilderImpl.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FutureDataInputStreamBuilderImpl.java
index 24a8d49747fe6..833c21ec1a67f 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FutureDataInputStreamBuilderImpl.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FutureDataInputStreamBuilderImpl.java
@@ -19,6 +19,7 @@
package org.apache.hadoop.fs.impl;
import javax.annotation.Nonnull;
+import javax.annotation.Nullable;
import java.io.IOException;
import java.util.concurrent.CompletableFuture;
@@ -47,7 +48,7 @@
* options accordingly, for example:
*
* If the option is not related to the file system, the option will be ignored.
- * If the option is must, but not supported by the file system, a
+ * If the option is must, but not supported/known by the file system, an
* {@link IllegalArgumentException} will be thrown.
*
*/
@@ -125,6 +126,9 @@ protected int getBufferSize() {
/**
* Set the size of the buffer to be used.
+ *
+ * @param bufSize buffer size.
+ * @return FutureDataInputStreamBuilder.
*/
public FutureDataInputStreamBuilder bufferSize(int bufSize) {
bufferSize = bufSize;
@@ -136,6 +140,8 @@ public FutureDataInputStreamBuilder bufferSize(int bufSize) {
* This must be used after the constructor has been invoked to create
* the actual builder: it allows for subclasses to do things after
* construction.
+ *
+ * @return FutureDataInputStreamBuilder.
*/
public FutureDataInputStreamBuilder builder() {
return getThisBuilder();
@@ -147,8 +153,9 @@ public FutureDataInputStreamBuilder getThisBuilder() {
}
@Override
- public FutureDataInputStreamBuilder withFileStatus(FileStatus st) {
- this.status = requireNonNull(st, "status");
+ public FutureDataInputStreamBuilder withFileStatus(
+ @Nullable FileStatus st) {
+ this.status = st;
return this;
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FutureIOSupport.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FutureIOSupport.java
index 18f5187cb6134..0a080426c2b24 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FutureIOSupport.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/FutureIOSupport.java
@@ -20,7 +20,6 @@
import java.io.IOException;
import java.io.InterruptedIOException;
-import java.util.Map;
import java.util.concurrent.CompletableFuture;
import java.util.concurrent.CompletionException;
import java.util.concurrent.ExecutionException;
@@ -37,14 +36,16 @@
/**
* Support for future IO and the FS Builder subclasses.
- * If methods in here are needed for applications, promote
- * to {@link FutureIO} for public use -with the original
- * method relaying to it. This is to ensure that external
- * filesystem implementations can safely use these methods
+ * All methods in this class have been superceded by those in
+ * {@link FutureIO}.
+ * The methods here are retained but all marked as deprecated.
+ * This is to ensure that any external
+ * filesystem implementations can still use these methods
* without linkage problems surfacing.
*/
@InterfaceAudience.Private
@InterfaceStability.Unstable
+@Deprecated
public final class FutureIOSupport {
private FutureIOSupport() {
@@ -53,6 +54,7 @@ private FutureIOSupport() {
/**
* Given a future, evaluate it. Raised exceptions are
* extracted and handled.
+ * See {@link FutureIO#awaitFuture(Future, long, TimeUnit)}.
* @param future future to evaluate
* @param type of the result.
* @return the result, if all went well.
@@ -60,7 +62,8 @@ private FutureIOSupport() {
* @throws IOException if something went wrong
* @throws RuntimeException any nested RTE thrown
*/
- public static T awaitFuture(final Future future)
+ @Deprecated
+ public static T awaitFuture(final Future future)
throws InterruptedIOException, IOException, RuntimeException {
return FutureIO.awaitFuture(future);
}
@@ -69,14 +72,18 @@ public static T awaitFuture(final Future future)
/**
* Given a future, evaluate it. Raised exceptions are
* extracted and handled.
+ * See {@link FutureIO#awaitFuture(Future, long, TimeUnit)}.
* @param future future to evaluate
* @param type of the result.
+ * @param timeout timeout.
+ * @param unit unit.
* @return the result, if all went well.
* @throws InterruptedIOException future was interrupted
* @throws IOException if something went wrong
* @throws RuntimeException any nested RTE thrown
* @throws TimeoutException the future timed out.
*/
+ @Deprecated
public static T awaitFuture(final Future future,
final long timeout,
final TimeUnit unit)
@@ -88,10 +95,7 @@ public static T awaitFuture(final Future future,
/**
* From the inner cause of an execution exception, extract the inner cause
* if it is an IOE or RTE.
- * This will always raise an exception, either the inner IOException,
- * an inner RuntimeException, or a new IOException wrapping the raised
- * exception.
- *
+ * See {@link FutureIO#raiseInnerCause(ExecutionException)}.
* @param e exception.
* @param type of return value.
* @return nothing, ever.
@@ -99,6 +103,7 @@ public static T awaitFuture(final Future future,
* any non-Runtime-Exception
* @throws RuntimeException if that is the inner cause.
*/
+ @Deprecated
public static T raiseInnerCause(final ExecutionException e)
throws IOException {
return FutureIO.raiseInnerCause(e);
@@ -107,6 +112,7 @@ public static T raiseInnerCause(final ExecutionException e)
/**
* Extract the cause of a completion failure and rethrow it if an IOE
* or RTE.
+ * See {@link FutureIO#raiseInnerCause(CompletionException)}.
* @param e exception.
* @param type of return value.
* @return nothing, ever.
@@ -114,20 +120,15 @@ public static T raiseInnerCause(final ExecutionException e)
* any non-Runtime-Exception
* @throws RuntimeException if that is the inner cause.
*/
+ @Deprecated
public static T raiseInnerCause(final CompletionException e)
throws IOException {
return FutureIO.raiseInnerCause(e);
}
/**
- * Propagate options to any builder, converting everything with the
- * prefix to an option where, if there were 2+ dot-separated elements,
- * it is converted to a schema.
- *
+ * Propagate options to any builder.
+ * {@link FutureIO#propagateOptions(FSBuilder, Configuration, String, String)}
* @param builder builder to modify
* @param conf configuration to read
* @param optionalPrefix prefix for optional settings
@@ -136,56 +137,39 @@ public static T raiseInnerCause(final CompletionException e)
* @param type of builder
* @return the builder passed in.
*/
+ @Deprecated
public static >
FSBuilder propagateOptions(
final FSBuilder builder,
final Configuration conf,
final String optionalPrefix,
final String mandatoryPrefix) {
- propagateOptions(builder, conf,
- optionalPrefix, false);
- propagateOptions(builder, conf,
- mandatoryPrefix, true);
- return builder;
+ return FutureIO.propagateOptions(builder,
+ conf, optionalPrefix, mandatoryPrefix);
}
/**
- * Propagate options to any builder, converting everything with the
- * prefix to an option where, if there were 2+ dot-separated elements,
- * it is converted to a schema.
- *
+ * Propagate options to any builder.
+ * {@link FutureIO#propagateOptions(FSBuilder, Configuration, String, boolean)}
* @param builder builder to modify
* @param conf configuration to read
* @param prefix prefix to scan/strip
* @param mandatory are the options to be mandatory or optional?
*/
+ @Deprecated
public static void propagateOptions(
final FSBuilder, ?> builder,
final Configuration conf,
final String prefix,
final boolean mandatory) {
-
- final String p = prefix.endsWith(".") ? prefix : (prefix + ".");
- final Map propsWithPrefix = conf.getPropsWithPrefix(p);
- for (Map.Entry entry : propsWithPrefix.entrySet()) {
- // change the schema off each entry
- String key = entry.getKey();
- String val = entry.getValue();
- if (mandatory) {
- builder.must(key, val);
- } else {
- builder.opt(key, val);
- }
- }
+ FutureIO.propagateOptions(builder, conf, prefix, mandatory);
}
/**
* Evaluate a CallableRaisingIOE in the current thread,
* converting IOEs to RTEs and propagating.
+ * See {@link FutureIO#eval(CallableRaisingIOE)}.
+ *
* @param callable callable to invoke
* @param Return type.
* @return the evaluated result.
@@ -194,17 +178,6 @@ public static void propagateOptions(
*/
public static CompletableFuture eval(
CallableRaisingIOE callable) {
- CompletableFuture result = new CompletableFuture<>();
- try {
- result.complete(callable.apply());
- } catch (UnsupportedOperationException | IllegalArgumentException tx) {
- // fail fast here
- throw tx;
- } catch (Throwable tx) {
- // fail lazily here to ensure callers expect all File IO operations to
- // surface later
- result.completeExceptionally(tx);
- }
- return result;
+ return FutureIO.eval(callable);
}
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/MultipartUploaderBuilderImpl.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/MultipartUploaderBuilderImpl.java
index 5584e647849f5..665bcc6a95660 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/MultipartUploaderBuilderImpl.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/MultipartUploaderBuilderImpl.java
@@ -88,6 +88,9 @@ protected MultipartUploaderBuilderImpl(@Nonnull FileContext fc,
/**
* Constructor.
+ *
+ * @param fileSystem fileSystem.
+ * @param p path.
*/
protected MultipartUploaderBuilderImpl(@Nonnull FileSystem fileSystem,
@Nonnull Path p) {
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/OpenFileParameters.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/OpenFileParameters.java
index 77b4ff52696a3..a19c5faff4d90 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/OpenFileParameters.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/OpenFileParameters.java
@@ -38,6 +38,9 @@ public class OpenFileParameters {
*/
private Set mandatoryKeys;
+ /** The optional keys. */
+ private Set optionalKeys;
+
/**
* Options set during the build sequence.
*/
@@ -61,6 +64,11 @@ public OpenFileParameters withMandatoryKeys(final Set keys) {
return this;
}
+ public OpenFileParameters withOptionalKeys(final Set keys) {
+ this.optionalKeys = requireNonNull(keys);
+ return this;
+ }
+
public OpenFileParameters withOptions(final Configuration opts) {
this.options = requireNonNull(opts);
return this;
@@ -80,6 +88,10 @@ public Set getMandatoryKeys() {
return mandatoryKeys;
}
+ public Set getOptionalKeys() {
+ return optionalKeys;
+ }
+
public Configuration getOptions() {
return options;
}
@@ -91,4 +103,5 @@ public int getBufferSize() {
public FileStatus getStatus() {
return status;
}
+
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/WeakReferenceThreadMap.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/WeakReferenceThreadMap.java
new file mode 100644
index 0000000000000..06be20310e43e
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/WeakReferenceThreadMap.java
@@ -0,0 +1,92 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.fs.impl;
+
+import java.lang.ref.WeakReference;
+import java.util.function.Consumer;
+import java.util.function.Function;
+import javax.annotation.Nullable;
+
+import org.apache.hadoop.util.WeakReferenceMap;
+
+import static java.util.Objects.requireNonNull;
+
+/**
+ * A WeakReferenceMap for threads.
+ * @param value type of the map
+ */
+public class WeakReferenceThreadMap extends WeakReferenceMap {
+
+ public WeakReferenceThreadMap(final Function super Long, ? extends V> factory,
+ @Nullable final Consumer super Long> referenceLost) {
+ super(factory, referenceLost);
+ }
+
+ /**
+ * Get the value for the current thread, creating if needed.
+ * @return an instance.
+ */
+ public V getForCurrentThread() {
+ return get(currentThreadId());
+ }
+
+ /**
+ * Remove the reference for the current thread.
+ * @return any reference value which existed.
+ */
+ public V removeForCurrentThread() {
+ return remove(currentThreadId());
+ }
+
+ /**
+ * Get the current thread ID.
+ * @return thread ID.
+ */
+ public long currentThreadId() {
+ return Thread.currentThread().getId();
+ }
+
+ /**
+ * Set the new value for the current thread.
+ * @param newVal new reference to set for the active thread.
+ * @return the previously set value, possibly null
+ */
+ public V setForCurrentThread(V newVal) {
+ requireNonNull(newVal);
+ long id = currentThreadId();
+
+ // if the same object is already in the map, just return it.
+ WeakReference existingWeakRef = lookup(id);
+
+ // The looked up reference could be one of
+ // 1. null: nothing there
+ // 2. valid but get() == null : reference lost by GC.
+ // 3. different from the new value
+ // 4. the same as the old value
+ if (resolve(existingWeakRef) == newVal) {
+ // case 4: do nothing, return the new value
+ return newVal;
+ } else {
+ // cases 1, 2, 3: update the map and return the old value
+ return put(id, newVal);
+ }
+
+ }
+
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/WrappedIOException.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/WrappedIOException.java
index d2c999683c6c6..d5144b5e9c531 100644
--- a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/WrappedIOException.java
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/WrappedIOException.java
@@ -20,20 +20,17 @@
import java.io.IOException;
import java.io.UncheckedIOException;
-import java.util.concurrent.ExecutionException;
import org.apache.hadoop.util.Preconditions;
import org.apache.hadoop.classification.InterfaceAudience;
import org.apache.hadoop.classification.InterfaceStability;
/**
- * A wrapper for an IOException which
- * {@link FutureIOSupport#raiseInnerCause(ExecutionException)} knows to
- * always extract the exception.
+ * A wrapper for an IOException.
*
* The constructor signature guarantees the cause will be an IOException,
* and as it checks for a null-argument, non-null.
- * @deprecated use the {@code UncheckedIOException}.
+ * @deprecated use the {@code UncheckedIOException} directly.]
*/
@Deprecated
@InterfaceAudience.Private
@@ -51,8 +48,4 @@ public WrappedIOException(final IOException cause) {
super(Preconditions.checkNotNull(cause));
}
- @Override
- public synchronized IOException getCause() {
- return (IOException) super.getCause();
- }
}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BlockCache.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BlockCache.java
new file mode 100644
index 0000000000000..c18dc519188ba
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BlockCache.java
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.fs.impl.prefetch;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+/**
+ * Provides functionality necessary for caching blocks of data read from FileSystem.
+ */
+public interface BlockCache extends Closeable {
+
+ /**
+ * Indicates whether the given block is in this cache.
+ *
+ * @param blockNumber the id of the given block.
+ * @return true if the given block is in this cache, false otherwise.
+ */
+ boolean containsBlock(int blockNumber);
+
+ /**
+ * Gets the blocks in this cache.
+ *
+ * @return the blocks in this cache.
+ */
+ Iterable blocks();
+
+ /**
+ * Gets the number of blocks in this cache.
+ *
+ * @return the number of blocks in this cache.
+ */
+ int size();
+
+ /**
+ * Gets the block having the given {@code blockNumber}.
+ *
+ * @param blockNumber the id of the desired block.
+ * @param buffer contents of the desired block are copied to this buffer.
+ * @throws IOException if there is an error reading the given block.
+ */
+ void get(int blockNumber, ByteBuffer buffer) throws IOException;
+
+ /**
+ * Puts the given block in this cache.
+ *
+ * @param blockNumber the id of the given block.
+ * @param buffer contents of the given block to be added to this cache.
+ * @throws IOException if there is an error writing the given block.
+ */
+ void put(int blockNumber, ByteBuffer buffer) throws IOException;
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BlockData.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BlockData.java
new file mode 100644
index 0000000000000..ecb8bc7243be0
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BlockData.java
@@ -0,0 +1,250 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.fs.impl.prefetch;
+
+import static org.apache.hadoop.fs.impl.prefetch.Validate.checkNotNegative;
+import static org.apache.hadoop.fs.impl.prefetch.Validate.checkPositiveInteger;
+import static org.apache.hadoop.fs.impl.prefetch.Validate.checkWithinRange;
+
+/**
+ * Holds information about blocks of data in a file.
+ */
+public final class BlockData {
+
+ // State of each block of data.
+ enum State {
+
+ /** Data is not yet ready to be read from this block (still being prefetched). */
+ NOT_READY,
+
+ /** A read of this block has been enqueued in the prefetch queue. */
+ QUEUED,
+
+ /** A read of this block has been enqueued in the prefetch queue. */
+ READY,
+
+ /** This block has been cached in the local disk cache. */
+ CACHED
+ }
+
+ /**
+ * State of all blocks in a file.
+ */
+ private State[] state;
+
+ /**
+ * The size of a file.
+ */
+ private final long fileSize;
+
+ /**
+ * The file is divided into blocks of this size.
+ */
+ private final int blockSize;
+
+ /**
+ * The file has these many blocks.
+ */
+ private final int numBlocks;
+
+ /**
+ * Constructs an instance of {@link BlockData}.
+ * @param fileSize the size of a file.
+ * @param blockSize the file is divided into blocks of this size.
+ * @throws IllegalArgumentException if fileSize is negative.
+ * @throws IllegalArgumentException if blockSize is negative.
+ * @throws IllegalArgumentException if blockSize is zero or negative.
+ */
+ public BlockData(long fileSize, int blockSize) {
+ checkNotNegative(fileSize, "fileSize");
+ if (fileSize == 0) {
+ checkNotNegative(blockSize, "blockSize");
+ } else {
+ checkPositiveInteger(blockSize, "blockSize");
+ }
+
+ this.fileSize = fileSize;
+ this.blockSize = blockSize;
+ this.numBlocks =
+ (fileSize == 0)
+ ? 0
+ : ((int) (fileSize / blockSize)) + (fileSize % blockSize > 0
+ ? 1
+ : 0);
+ this.state = new State[this.numBlocks];
+ for (int b = 0; b < this.numBlocks; b++) {
+ setState(b, State.NOT_READY);
+ }
+ }
+
+ /**
+ * Gets the size of each block.
+ * @return the size of each block.
+ */
+ public int getBlockSize() {
+ return blockSize;
+ }
+
+ /**
+ * Gets the size of the associated file.
+ * @return the size of the associated file.
+ */
+ public long getFileSize() {
+ return fileSize;
+ }
+
+ /**
+ * Gets the number of blocks in the associated file.
+ * @return the number of blocks in the associated file.
+ */
+ public int getNumBlocks() {
+ return numBlocks;
+ }
+
+ /**
+ * Indicates whether the given block is the last block in the associated file.
+ * @param blockNumber the id of the desired block.
+ * @return true if the given block is the last block in the associated file, false otherwise.
+ * @throws IllegalArgumentException if blockNumber is invalid.
+ */
+ public boolean isLastBlock(int blockNumber) {
+ if (fileSize == 0) {
+ return false;
+ }
+
+ throwIfInvalidBlockNumber(blockNumber);
+
+ return blockNumber == (numBlocks - 1);
+ }
+
+ /**
+ * Gets the id of the block that contains the given absolute offset.
+ * @param offset the absolute offset to check.
+ * @return the id of the block that contains the given absolute offset.
+ * @throws IllegalArgumentException if offset is invalid.
+ */
+ public int getBlockNumber(long offset) {
+ throwIfInvalidOffset(offset);
+
+ return (int) (offset / blockSize);
+ }
+
+ /**
+ * Gets the size of the given block.
+ * @param blockNumber the id of the desired block.
+ * @return the size of the given block.
+ */
+ public int getSize(int blockNumber) {
+ if (fileSize == 0) {
+ return 0;
+ }
+
+ if (isLastBlock(blockNumber)) {
+ return (int) (fileSize - (((long) blockSize) * (numBlocks - 1)));
+ } else {
+ return blockSize;
+ }
+ }
+
+ /**
+ * Indicates whether the given absolute offset is valid.
+ * @param offset absolute offset in the file..
+ * @return true if the given absolute offset is valid, false otherwise.
+ */
+ public boolean isValidOffset(long offset) {
+ return (offset >= 0) && (offset < fileSize);
+ }
+
+ /**
+ * Gets the start offset of the given block.
+ * @param blockNumber the id of the given block.
+ * @return the start offset of the given block.
+ * @throws IllegalArgumentException if blockNumber is invalid.
+ */
+ public long getStartOffset(int blockNumber) {
+ throwIfInvalidBlockNumber(blockNumber);
+
+ return blockNumber * (long) blockSize;
+ }
+
+ /**
+ * Gets the relative offset corresponding to the given block and the absolute offset.
+ * @param blockNumber the id of the given block.
+ * @param offset absolute offset in the file.
+ * @return the relative offset corresponding to the given block and the absolute offset.
+ * @throws IllegalArgumentException if either blockNumber or offset is invalid.
+ */
+ public int getRelativeOffset(int blockNumber, long offset) {
+ throwIfInvalidOffset(offset);
+
+ return (int) (offset - getStartOffset(blockNumber));
+ }
+
+ /**
+ * Gets the state of the given block.
+ * @param blockNumber the id of the given block.
+ * @return the state of the given block.
+ * @throws IllegalArgumentException if blockNumber is invalid.
+ */
+ public State getState(int blockNumber) {
+ throwIfInvalidBlockNumber(blockNumber);
+
+ return state[blockNumber];
+ }
+
+ /**
+ * Sets the state of the given block to the given value.
+ * @param blockNumber the id of the given block.
+ * @param blockState the target state.
+ * @throws IllegalArgumentException if blockNumber is invalid.
+ */
+ public void setState(int blockNumber, State blockState) {
+ throwIfInvalidBlockNumber(blockNumber);
+
+ state[blockNumber] = blockState;
+ }
+
+ // Debug helper.
+ public String getStateString() {
+ StringBuilder sb = new StringBuilder();
+ int blockNumber = 0;
+ while (blockNumber < numBlocks) {
+ State tstate = getState(blockNumber);
+ int endBlockNumber = blockNumber;
+ while ((endBlockNumber < numBlocks) && (getState(endBlockNumber)
+ == tstate)) {
+ endBlockNumber++;
+ }
+ sb.append(
+ String.format("[%03d ~ %03d] %s%n", blockNumber, endBlockNumber - 1,
+ tstate));
+ blockNumber = endBlockNumber;
+ }
+ return sb.toString();
+ }
+
+ private void throwIfInvalidBlockNumber(int blockNumber) {
+ checkWithinRange(blockNumber, "blockNumber", 0, numBlocks - 1);
+ }
+
+ private void throwIfInvalidOffset(long offset) {
+ checkWithinRange(offset, "offset", 0, fileSize - 1);
+ }
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BlockManager.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BlockManager.java
new file mode 100644
index 0000000000000..45f0aabe7dcd9
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BlockManager.java
@@ -0,0 +1,145 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.fs.impl.prefetch;
+
+import java.io.Closeable;
+import java.io.IOException;
+import java.nio.ByteBuffer;
+
+import static org.apache.hadoop.fs.impl.prefetch.Validate.checkNotNegative;
+import static org.apache.hadoop.fs.impl.prefetch.Validate.checkNotNull;
+
+/**
+ * Provides read access to the underlying file one block at a time.
+ *
+ * This class is the simplest form of a {@code BlockManager} that does
+ * perform prefetching or caching.
+ */
+public abstract class BlockManager implements Closeable {
+
+ /**
+ * Information about each block of the underlying file.
+ */
+ private final BlockData blockData;
+
+ /**
+ * Constructs an instance of {@code BlockManager}.
+ *
+ * @param blockData information about each block of the underlying file.
+ *
+ * @throws IllegalArgumentException if blockData is null.
+ */
+ public BlockManager(BlockData blockData) {
+ checkNotNull(blockData, "blockData");
+
+ this.blockData = blockData;
+ }
+
+ /**
+ * Gets block data information.
+ *
+ * @return instance of {@code BlockData}.
+ */
+ public BlockData getBlockData() {
+ return blockData;
+ }
+
+ /**
+ * Gets the block having the given {@code blockNumber}.
+ *
+ * The entire block is read into memory and returned as a {@code BufferData}.
+ * The blocks are treated as a limited resource and must be released when
+ * one is done reading them.
+ *
+ * @param blockNumber the number of the block to be read and returned.
+ * @return {@code BufferData} having data from the given block.
+ *
+ * @throws IOException if there an error reading the given block.
+ * @throws IllegalArgumentException if blockNumber is negative.
+ */
+ public BufferData get(int blockNumber) throws IOException {
+ checkNotNegative(blockNumber, "blockNumber");
+
+ int size = blockData.getSize(blockNumber);
+ ByteBuffer buffer = ByteBuffer.allocate(size);
+ long startOffset = blockData.getStartOffset(blockNumber);
+ read(buffer, startOffset, size);
+ buffer.flip();
+ return new BufferData(blockNumber, buffer);
+ }
+
+ /**
+ * Reads into the given {@code buffer} {@code size} bytes from the underlying file
+ * starting at {@code startOffset}.
+ *
+ * @param buffer the buffer to read data in to.
+ * @param startOffset the offset at which reading starts.
+ * @param size the number bytes to read.
+ * @return number of bytes read.
+ * @throws IOException if there an error reading the given block.
+ */
+ public abstract int read(ByteBuffer buffer, long startOffset, int size) throws IOException;
+
+ /**
+ * Releases resources allocated to the given block.
+ *
+ * @param data the {@code BufferData} to release.
+ *
+ * @throws IllegalArgumentException if data is null.
+ */
+ public void release(BufferData data) {
+ checkNotNull(data, "data");
+
+ // Do nothing because we allocate a new buffer each time.
+ }
+
+ /**
+ * Requests optional prefetching of the given block.
+ *
+ * @param blockNumber the id of the block to prefetch.
+ *
+ * @throws IllegalArgumentException if blockNumber is negative.
+ */
+ public void requestPrefetch(int blockNumber) {
+ checkNotNegative(blockNumber, "blockNumber");
+
+ // Do nothing because we do not support prefetches.
+ }
+
+ /**
+ * Requests cancellation of any previously issued prefetch requests.
+ */
+ public void cancelPrefetches() {
+ // Do nothing because we do not support prefetches.
+ }
+
+ /**
+ * Requests that the given block should be copied to the cache. Optional operation.
+ *
+ * @param data the {@code BufferData} instance to optionally cache.
+ */
+ public void requestCaching(BufferData data) {
+ // Do nothing because we do not support caching.
+ }
+
+ @Override
+ public void close() {
+ }
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BlockOperations.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BlockOperations.java
new file mode 100644
index 0000000000000..2744334a3bd7a
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BlockOperations.java
@@ -0,0 +1,425 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.fs.impl.prefetch;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.DoubleSummaryStatistics;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static org.apache.hadoop.fs.impl.prefetch.Validate.checkNotNegative;
+
+/**
+ * Block level operations performed on a file.
+ * This class is meant to be used by {@code BlockManager}.
+ * It is separated out in its own file due to its size.
+ *
+ * This class is used for debugging/logging. Calls to this class
+ * can be safely removed without affecting the overall operation.
+ */
+public final class BlockOperations {
+ private static final Logger LOG = LoggerFactory.getLogger(BlockOperations.class);
+
+ /**
+ * Operation kind.
+ */
+ public enum Kind {
+ UNKNOWN("??", "unknown", false),
+ CANCEL_PREFETCHES("CP", "cancelPrefetches", false),
+ CLOSE("CX", "close", false),
+ CACHE_PUT("C+", "putC", true),
+ GET_CACHED("GC", "getCached", true),
+ GET_PREFETCHED("GP", "getPrefetched", true),
+ GET_READ("GR", "getRead", true),
+ PREFETCH("PF", "prefetch", true),
+ RELEASE("RL", "release", true),
+ REQUEST_CACHING("RC", "requestCaching", true),
+ REQUEST_PREFETCH("RP", "requestPrefetch", true);
+
+ private String shortName;
+ private String name;
+ private boolean hasBlock;
+
+ Kind(String shortName, String name, boolean hasBlock) {
+ this.shortName = shortName;
+ this.name = name;
+ this.hasBlock = hasBlock;
+ }
+
+ private static Map shortNameToKind = new HashMap<>();
+
+ public static Kind fromShortName(String shortName) {
+ if (shortNameToKind.isEmpty()) {
+ for (Kind kind : Kind.values()) {
+ shortNameToKind.put(kind.shortName, kind);
+ }
+ }
+ return shortNameToKind.get(shortName);
+ }
+ }
+
+ public static class Operation {
+ private final Kind kind;
+ private final int blockNumber;
+ private final long timestamp;
+
+ public Operation(Kind kind, int blockNumber) {
+ this.kind = kind;
+ this.blockNumber = blockNumber;
+ this.timestamp = System.nanoTime();
+ }
+
+ public Kind getKind() {
+ return kind;
+ }
+
+ public int getBlockNumber() {
+ return blockNumber;
+ }
+
+ public long getTimestamp() {
+ return timestamp;
+ }
+
+ public void getSummary(StringBuilder sb) {
+ if (kind.hasBlock) {
+ sb.append(String.format("%s(%d)", kind.shortName, blockNumber));
+ } else {
+ sb.append(String.format("%s", kind.shortName));
+ }
+ }
+
+ public String getDebugInfo() {
+ if (kind.hasBlock) {
+ return String.format("--- %s(%d)", kind.name, blockNumber);
+ } else {
+ return String.format("... %s()", kind.name);
+ }
+ }
+ }
+
+ public static class End extends Operation {
+ private Operation op;
+
+ public End(Operation op) {
+ super(op.kind, op.blockNumber);
+ this.op = op;
+ }
+
+ @Override
+ public void getSummary(StringBuilder sb) {
+ sb.append("E");
+ super.getSummary(sb);
+ }
+
+ @Override
+ public String getDebugInfo() {
+ return "***" + super.getDebugInfo().substring(3);
+ }
+
+ public double duration() {
+ return (getTimestamp() - op.getTimestamp()) / 1e9;
+ }
+ }
+
+ private ArrayList ops;
+ private boolean debugMode;
+
+ public BlockOperations() {
+ this.ops = new ArrayList<>();
+ }
+
+ public synchronized void setDebug(boolean state) {
+ debugMode = state;
+ }
+
+ private synchronized Operation add(Operation op) {
+ if (debugMode) {
+ LOG.info(op.getDebugInfo());
+ }
+ ops.add(op);
+ return op;
+ }
+
+ public Operation getPrefetched(int blockNumber) {
+ checkNotNegative(blockNumber, "blockNumber");
+
+ return add(new Operation(Kind.GET_PREFETCHED, blockNumber));
+ }
+
+ public Operation getCached(int blockNumber) {
+ checkNotNegative(blockNumber, "blockNumber");
+
+ return add(new Operation(Kind.GET_CACHED, blockNumber));
+ }
+
+ public Operation getRead(int blockNumber) {
+ checkNotNegative(blockNumber, "blockNumber");
+
+ return add(new Operation(Kind.GET_READ, blockNumber));
+ }
+
+ public Operation release(int blockNumber) {
+ checkNotNegative(blockNumber, "blockNumber");
+
+ return add(new Operation(Kind.RELEASE, blockNumber));
+ }
+
+ public Operation requestPrefetch(int blockNumber) {
+ checkNotNegative(blockNumber, "blockNumber");
+
+ return add(new Operation(Kind.REQUEST_PREFETCH, blockNumber));
+ }
+
+ public Operation prefetch(int blockNumber) {
+ checkNotNegative(blockNumber, "blockNumber");
+
+ return add(new Operation(Kind.PREFETCH, blockNumber));
+ }
+
+ public Operation cancelPrefetches() {
+ return add(new Operation(Kind.CANCEL_PREFETCHES, -1));
+ }
+
+ public Operation close() {
+ return add(new Operation(Kind.CLOSE, -1));
+ }
+
+ public Operation requestCaching(int blockNumber) {
+ checkNotNegative(blockNumber, "blockNumber");
+
+ return add(new Operation(Kind.REQUEST_CACHING, blockNumber));
+ }
+
+ public Operation addToCache(int blockNumber) {
+ checkNotNegative(blockNumber, "blockNumber");
+
+ return add(new Operation(Kind.CACHE_PUT, blockNumber));
+ }
+
+ public Operation end(Operation op) {
+ return add(new End(op));
+ }
+
+ private static void append(StringBuilder sb, String format, Object... args) {
+ sb.append(String.format(format, args));
+ }
+
+ public synchronized String getSummary(boolean showDebugInfo) {
+ StringBuilder sb = new StringBuilder();
+ for (Operation op : ops) {
+ if (op != null) {
+ if (showDebugInfo) {
+ sb.append(op.getDebugInfo());
+ sb.append("\n");
+ } else {
+ op.getSummary(sb);
+ sb.append(";");
+ }
+ }
+ }
+
+ sb.append("\n");
+ getDurationInfo(sb);
+
+ return sb.toString();
+ }
+
+ public synchronized void getDurationInfo(StringBuilder sb) {
+ Map durations = new HashMap<>();
+ for (Operation op : ops) {
+ if (op instanceof End) {
+ End endOp = (End) op;
+ DoubleSummaryStatistics stats = durations.get(endOp.getKind());
+ if (stats == null) {
+ stats = new DoubleSummaryStatistics();
+ durations.put(endOp.getKind(), stats);
+ }
+ stats.accept(endOp.duration());
+ }
+ }
+
+ List kinds = Arrays.asList(
+ Kind.GET_CACHED,
+ Kind.GET_PREFETCHED,
+ Kind.GET_READ,
+ Kind.CACHE_PUT,
+ Kind.PREFETCH,
+ Kind.REQUEST_CACHING,
+ Kind.REQUEST_PREFETCH,
+ Kind.CANCEL_PREFETCHES,
+ Kind.RELEASE,
+ Kind.CLOSE
+ );
+
+ for (Kind kind : kinds) {
+ append(sb, "%-18s : ", kind);
+ DoubleSummaryStatistics stats = durations.get(kind);
+ if (stats == null) {
+ append(sb, "--\n");
+ } else {
+ append(
+ sb,
+ "#ops = %3d, total = %5.1f, min: %3.1f, avg: %3.1f, max: %3.1f\n",
+ stats.getCount(),
+ stats.getSum(),
+ stats.getMin(),
+ stats.getAverage(),
+ stats.getMax());
+ }
+ }
+ }
+
+ public synchronized void analyze(StringBuilder sb) {
+ Map> blockOps = new HashMap<>();
+
+ // Group-by block number.
+ for (Operation op : ops) {
+ if (op.blockNumber < 0) {
+ continue;
+ }
+
+ List perBlockOps;
+ if (!blockOps.containsKey(op.blockNumber)) {
+ perBlockOps = new ArrayList<>();
+ blockOps.put(op.blockNumber, perBlockOps);
+ }
+
+ perBlockOps = blockOps.get(op.blockNumber);
+ perBlockOps.add(op);
+ }
+
+ List prefetchedNotUsed = new ArrayList<>();
+ List cachedNotUsed = new ArrayList<>();
+
+ for (Map.Entry> entry : blockOps.entrySet()) {
+ Integer blockNumber = entry.getKey();
+ List perBlockOps = entry.getValue();
+ Map kindCounts = new HashMap<>();
+ Map endKindCounts = new HashMap<>();
+
+ for (Operation op : perBlockOps) {
+ if (op instanceof End) {
+ int endCount = endKindCounts.getOrDefault(op.kind, 0) + 1;
+ endKindCounts.put(op.kind, endCount);
+ } else {
+ int count = kindCounts.getOrDefault(op.kind, 0) + 1;
+ kindCounts.put(op.kind, count);
+ }
+ }
+
+ for (Kind kind : kindCounts.keySet()) {
+ int count = kindCounts.getOrDefault(kind, 0);
+ int endCount = endKindCounts.getOrDefault(kind, 0);
+ if (count != endCount) {
+ append(sb, "[%d] %s : #ops(%d) != #end-ops(%d)\n", blockNumber, kind, count, endCount);
+ }
+
+ if (count > 1) {
+ append(sb, "[%d] %s = %d\n", blockNumber, kind, count);
+ }
+ }
+
+ int prefetchCount = kindCounts.getOrDefault(Kind.PREFETCH, 0);
+ int getPrefetchedCount = kindCounts.getOrDefault(Kind.GET_PREFETCHED, 0);
+ if ((prefetchCount > 0) && (getPrefetchedCount < prefetchCount)) {
+ prefetchedNotUsed.add(blockNumber);
+ }
+
+ int cacheCount = kindCounts.getOrDefault(Kind.CACHE_PUT, 0);
+ int getCachedCount = kindCounts.getOrDefault(Kind.GET_CACHED, 0);
+ if ((cacheCount > 0) && (getCachedCount < cacheCount)) {
+ cachedNotUsed.add(blockNumber);
+ }
+ }
+
+ if (!prefetchedNotUsed.isEmpty()) {
+ append(sb, "Prefetched but not used: %s\n", getIntList(prefetchedNotUsed));
+ }
+
+ if (!cachedNotUsed.isEmpty()) {
+ append(sb, "Cached but not used: %s\n", getIntList(cachedNotUsed));
+ }
+ }
+
+ private static String getIntList(Iterable nums) {
+ List numList = new ArrayList<>();
+ for (Integer n : nums) {
+ numList.add(n.toString());
+ }
+ return String.join(", ", numList);
+ }
+
+ public static BlockOperations fromSummary(String summary) {
+ BlockOperations ops = new BlockOperations();
+ ops.setDebug(true);
+ Pattern blockOpPattern = Pattern.compile("([A-Z+]+)(\\(([0-9]+)?\\))?");
+ String[] tokens = summary.split(";");
+ for (String token : tokens) {
+ Matcher matcher = blockOpPattern.matcher(token);
+ if (!matcher.matches()) {
+ String message = String.format("Unknown summary format: %s", token);
+ throw new IllegalArgumentException(message);
+ }
+
+ String shortName = matcher.group(1);
+ String blockNumberStr = matcher.group(3);
+ int blockNumber = (blockNumberStr == null) ? -1 : Integer.parseInt(blockNumberStr);
+ Kind kind = Kind.fromShortName(shortName);
+ Kind endKind = null;
+ if (kind == null) {
+ if (shortName.charAt(0) == 'E') {
+ endKind = Kind.fromShortName(shortName.substring(1));
+ }
+ }
+
+ if (kind == null && endKind == null) {
+ String message = String.format("Unknown short name: %s (token = %s)", shortName, token);
+ throw new IllegalArgumentException(message);
+ }
+
+ if (kind != null) {
+ ops.add(new Operation(kind, blockNumber));
+ } else {
+ Operation op = null;
+ for (int i = ops.ops.size() - 1; i >= 0; i--) {
+ op = ops.ops.get(i);
+ if ((op.blockNumber == blockNumber) && (op.kind == endKind) && !(op instanceof End)) {
+ ops.add(new End(op));
+ break;
+ }
+ }
+
+ if (op == null) {
+ LOG.warn("Start op not found: {}({})", endKind, blockNumber);
+ }
+ }
+ }
+
+ return ops;
+ }
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BoundedResourcePool.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BoundedResourcePool.java
new file mode 100644
index 0000000000000..a871f8237729f
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BoundedResourcePool.java
@@ -0,0 +1,195 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.fs.impl.prefetch;
+
+import java.util.Collections;
+import java.util.IdentityHashMap;
+import java.util.Set;
+import java.util.concurrent.ArrayBlockingQueue;
+
+import static org.apache.hadoop.fs.impl.prefetch.Validate.checkNotNull;
+
+/**
+ * Manages a fixed pool of resources.
+ *
+ * Avoids creating a new resource if a previously created instance is already available.
+ */
+public abstract class BoundedResourcePool extends ResourcePool {
+ /**
+ * The size of this pool. Fixed at creation time.
+ */
+ private final int size;
+
+ /**
+ * Items currently available in the pool.
+ */
+ private ArrayBlockingQueue items;
+
+ /**
+ * Items that have been created so far (regardless of whether they are currently available).
+ */
+ private Set createdItems;
+
+ /**
+ * Constructs a resource pool of the given size.
+ *
+ * @param size the size of this pool. Cannot be changed post creation.
+ *
+ * @throws IllegalArgumentException if size is zero or negative.
+ */
+ public BoundedResourcePool(int size) {
+ Validate.checkPositiveInteger(size, "size");
+
+ this.size = size;
+ this.items = new ArrayBlockingQueue<>(size);
+
+ // The created items are identified based on their object reference.
+ this.createdItems = Collections.newSetFromMap(new IdentityHashMap());
+ }
+
+ /**
+ * Acquires a resource blocking if necessary until one becomes available.
+ */
+ @Override
+ public T acquire() {
+ return this.acquireHelper(true);
+ }
+
+ /**
+ * Acquires a resource blocking if one is immediately available. Otherwise returns null.
+ */
+ @Override
+ public T tryAcquire() {
+ return this.acquireHelper(false);
+ }
+
+ /**
+ * Releases a previously acquired resource.
+ *
+ * @throws IllegalArgumentException if item is null.
+ */
+ @Override
+ public void release(T item) {
+ checkNotNull(item, "item");
+
+ synchronized (createdItems) {
+ if (!createdItems.contains(item)) {
+ throw new IllegalArgumentException("This item is not a part of this pool");
+ }
+ }
+
+ // Return if this item was released earlier.
+ // We cannot use items.contains() because that check is not based on reference equality.
+ for (T entry : items) {
+ if (entry == item) {
+ return;
+ }
+ }
+
+ try {
+ items.put(item);
+ } catch (InterruptedException e) {
+ throw new IllegalStateException("release() should never block", e);
+ }
+ }
+
+ @Override
+ public synchronized void close() {
+ for (T item : createdItems) {
+ close(item);
+ }
+
+ items.clear();
+ items = null;
+
+ createdItems.clear();
+ createdItems = null;
+ }
+
+ /**
+ * Derived classes may implement a way to cleanup each item.
+ */
+ @Override
+ protected synchronized void close(T item) {
+ // Do nothing in this class. Allow overriding classes to take any cleanup action.
+ }
+
+ /**
+ * Number of items created so far. Mostly for testing purposes.
+ * @return the count.
+ */
+ public int numCreated() {
+ synchronized (createdItems) {
+ return createdItems.size();
+ }
+ }
+
+ /**
+ * Number of items available to be acquired. Mostly for testing purposes.
+ * @return the number available.
+ */
+ public synchronized int numAvailable() {
+ return (size - numCreated()) + items.size();
+ }
+
+ // For debugging purposes.
+ @Override
+ public synchronized String toString() {
+ return String.format(
+ "size = %d, #created = %d, #in-queue = %d, #available = %d",
+ size, numCreated(), items.size(), numAvailable());
+ }
+
+ /**
+ * Derived classes must implement a way to create an instance of a resource.
+ */
+ protected abstract T createNew();
+
+ private T acquireHelper(boolean canBlock) {
+
+ // Prefer reusing an item if one is available.
+ // That avoids unnecessarily creating new instances.
+ T result = items.poll();
+ if (result != null) {
+ return result;
+ }
+
+ synchronized (createdItems) {
+ // Create a new instance if allowed by the capacity of this pool.
+ if (createdItems.size() < size) {
+ T item = createNew();
+ createdItems.add(item);
+ return item;
+ }
+ }
+
+ if (canBlock) {
+ try {
+ // Block for an instance to be available.
+ return items.take();
+ } catch (InterruptedException e) {
+ Thread.currentThread().interrupt();
+ return null;
+ }
+ } else {
+ return null;
+ }
+ }
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BufferData.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BufferData.java
new file mode 100644
index 0000000000000..de68269ab700c
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BufferData.java
@@ -0,0 +1,319 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.fs.impl.prefetch;
+
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.concurrent.Future;
+import java.util.zip.CRC32;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Holds the state of a ByteBuffer that is in use by {@code CachingBlockManager}.
+ *
+ * This class is not meant to be of general use. It exists into its own file due to its size.
+ * We use the term block and buffer interchangeably in this file because one buffer
+ * holds exactly one block of data.
+ *
+ * Holding all of the state associated with a block allows us to validate and control
+ * state transitions in a synchronized fashion.
+ */
+public final class BufferData {
+
+ private static final Logger LOG = LoggerFactory.getLogger(BufferData.class);
+
+ public enum State {
+ /**
+ * Unknown / invalid state.
+ */
+ UNKNOWN,
+
+ /**
+ * Buffer has been acquired but has no data.
+ */
+ BLANK,
+
+ /**
+ * This block is being prefetched.
+ */
+ PREFETCHING,
+
+ /**
+ * This block is being added to the local cache.
+ */
+ CACHING,
+
+ /**
+ * This block has data and is ready to be read.
+ */
+ READY,
+
+ /**
+ * This block is no longer in-use and should not be used once in this state.
+ */
+ DONE
+ }
+
+ /**
+ * Number of the block associated with this buffer.
+ */
+ private final int blockNumber;
+
+ /**
+ * The buffer associated with this block.
+ */
+ private ByteBuffer buffer;
+
+ /**
+ * Current state of this block.
+ */
+ private volatile State state;
+
+ /**
+ * Future of the action being performed on this block (eg, prefetching or caching).
+ */
+ private Future action;
+
+ /**
+ * Checksum of the buffer contents once in READY state.
+ */
+ private long checksum = 0;
+
+ /**
+ * Constructs an instances of this class.
+ *
+ * @param blockNumber Number of the block associated with this buffer.
+ * @param buffer The buffer associated with this block.
+ *
+ * @throws IllegalArgumentException if blockNumber is negative.
+ * @throws IllegalArgumentException if buffer is null.
+ */
+ public BufferData(int blockNumber, ByteBuffer buffer) {
+ Validate.checkNotNegative(blockNumber, "blockNumber");
+ Validate.checkNotNull(buffer, "buffer");
+
+ this.blockNumber = blockNumber;
+ this.buffer = buffer;
+ this.state = State.BLANK;
+ }
+
+ /**
+ * Gets the id of this block.
+ *
+ * @return the id of this block.
+ */
+ public int getBlockNumber() {
+ return this.blockNumber;
+ }
+
+ /**
+ * Gets the buffer associated with this block.
+ *
+ * @return the buffer associated with this block.
+ */
+ public ByteBuffer getBuffer() {
+ return this.buffer;
+ }
+
+ /**
+ * Gets the state of this block.
+ *
+ * @return the state of this block.
+ */
+ public State getState() {
+ return this.state;
+ }
+
+ /**
+ * Gets the checksum of data in this block.
+ *
+ * @return the checksum of data in this block.
+ */
+ public long getChecksum() {
+ return this.checksum;
+ }
+
+ /**
+ * Computes CRC32 checksum of the given buffer's contents.
+ *
+ * @param buffer the buffer whose content's checksum is to be computed.
+ * @return the computed checksum.
+ */
+ public static long getChecksum(ByteBuffer buffer) {
+ ByteBuffer tempBuffer = buffer.duplicate();
+ tempBuffer.rewind();
+ CRC32 crc32 = new CRC32();
+ crc32.update(tempBuffer);
+ return crc32.getValue();
+ }
+
+ public synchronized Future getActionFuture() {
+ return this.action;
+ }
+
+ /**
+ * Indicates that a prefetch operation is in progress.
+ *
+ * @param actionFuture the {@code Future} of a prefetch action.
+ *
+ * @throws IllegalArgumentException if actionFuture is null.
+ */
+ public synchronized void setPrefetch(Future actionFuture) {
+ Validate.checkNotNull(actionFuture, "actionFuture");
+
+ this.updateState(State.PREFETCHING, State.BLANK);
+ this.action = actionFuture;
+ }
+
+ /**
+ * Indicates that a caching operation is in progress.
+ *
+ * @param actionFuture the {@code Future} of a caching action.
+ *
+ * @throws IllegalArgumentException if actionFuture is null.
+ */
+ public synchronized void setCaching(Future actionFuture) {
+ Validate.checkNotNull(actionFuture, "actionFuture");
+
+ this.throwIfStateIncorrect(State.PREFETCHING, State.READY);
+ this.state = State.CACHING;
+ this.action = actionFuture;
+ }
+
+ /**
+ * Marks the completion of reading data into the buffer.
+ * The buffer cannot be modified once in this state.
+ *
+ * @param expectedCurrentState the collection of states from which transition to READY is allowed.
+ */
+ public synchronized void setReady(State... expectedCurrentState) {
+ if (this.checksum != 0) {
+ throw new IllegalStateException("Checksum cannot be changed once set");
+ }
+
+ this.buffer = this.buffer.asReadOnlyBuffer();
+ this.checksum = getChecksum(this.buffer);
+ this.buffer.rewind();
+ this.updateState(State.READY, expectedCurrentState);
+ }
+
+ /**
+ * Indicates that this block is no longer of use and can be reclaimed.
+ */
+ public synchronized void setDone() {
+ if (this.checksum != 0) {
+ if (getChecksum(this.buffer) != this.checksum) {
+ throw new IllegalStateException("checksum changed after setReady()");
+ }
+ }
+ this.state = State.DONE;
+ this.action = null;
+ }
+
+ /**
+ * Updates the current state to the specified value.
+ * Asserts that the current state is as expected.
+ * @param newState the state to transition to.
+ * @param expectedCurrentState the collection of states from which
+ * transition to {@code newState} is allowed.
+ *
+ * @throws IllegalArgumentException if newState is null.
+ * @throws IllegalArgumentException if expectedCurrentState is null.
+ */
+ public synchronized void updateState(State newState,
+ State... expectedCurrentState) {
+ Validate.checkNotNull(newState, "newState");
+ Validate.checkNotNull(expectedCurrentState, "expectedCurrentState");
+
+ this.throwIfStateIncorrect(expectedCurrentState);
+ this.state = newState;
+ }
+
+ /**
+ * Helper that asserts the current state is one of the expected values.
+ *
+ * @param states the collection of allowed states.
+ *
+ * @throws IllegalArgumentException if states is null.
+ */
+ public void throwIfStateIncorrect(State... states) {
+ Validate.checkNotNull(states, "states");
+
+ if (this.stateEqualsOneOf(states)) {
+ return;
+ }
+
+ List statesStr = new ArrayList();
+ for (State s : states) {
+ statesStr.add(s.toString());
+ }
+
+ String message = String.format(
+ "Expected buffer state to be '%s' but found: %s",
+ String.join(" or ", statesStr), this);
+ throw new IllegalStateException(message);
+ }
+
+ public boolean stateEqualsOneOf(State... states) {
+ State currentState = this.state;
+
+ for (State s : states) {
+ if (currentState == s) {
+ return true;
+ }
+ }
+
+ return false;
+ }
+
+ public String toString() {
+
+ return String.format(
+ "[%03d] id: %03d, %s: buf: %s, checksum: %d, future: %s",
+ this.blockNumber,
+ System.identityHashCode(this),
+ this.state,
+ this.getBufferStr(this.buffer),
+ this.checksum,
+ this.getFutureStr(this.action));
+ }
+
+ private String getFutureStr(Future f) {
+ if (f == null) {
+ return "--";
+ } else {
+ return this.action.isDone() ? "done" : "not done";
+ }
+ }
+
+ private String getBufferStr(ByteBuffer buf) {
+ if (buf == null) {
+ return "--";
+ } else {
+ return String.format(
+ "(id = %d, pos = %d, lim = %d)",
+ System.identityHashCode(buf),
+ buf.position(), buf.limit());
+ }
+ }
+}
diff --git a/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BufferPool.java b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BufferPool.java
new file mode 100644
index 0000000000000..189357f6bd04f
--- /dev/null
+++ b/hadoop-common-project/hadoop-common/src/main/java/org/apache/hadoop/fs/impl/prefetch/BufferPool.java
@@ -0,0 +1,323 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.hadoop.fs.impl.prefetch;
+
+import java.io.Closeable;
+import java.nio.ByteBuffer;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.IdentityHashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.concurrent.Future;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import static java.util.Objects.requireNonNull;
+import static org.apache.hadoop.fs.impl.prefetch.Validate.checkNotNegative;
+import static org.apache.hadoop.fs.impl.prefetch.Validate.checkState;
+import static org.apache.hadoop.util.Preconditions.checkArgument;
+import static org.apache.hadoop.util.Preconditions.checkNotNull;
+
+/**
+ * Manages a fixed pool of {@code ByteBuffer} instances.
+ *
+ * Avoids creating a new buffer if a previously created buffer is already available.
+ */
+public class BufferPool implements Closeable {
+
+ private static final Logger LOG = LoggerFactory.getLogger(BufferPool.class);
+
+ /**
+ * Max number of buffers in this pool.
+ */
+ private final int size;
+
+ /**
+ * Size in bytes of each buffer.
+ */
+ private final int bufferSize;
+
+ /*
+ Invariants for internal state.
+ -- a buffer is either in this.pool or in this.allocated
+ -- transition between this.pool <==> this.allocated must be atomic
+ -- only one buffer allocated for a given blockNumber
+ */
+
+
+ /**
+ * Underlying bounded resource pool.
+ */
+ private BoundedResourcePool pool;
+
+ /**
+ * Allows associating metadata to each buffer in the pool.
+ */
+ private Map allocated;
+
+ /**
+ * Prefetching stats.
+ */
+ private PrefetchingStatistics prefetchingStatistics;
+
+ /**
+ * Initializes a new instance of the {@code BufferPool} class.
+ * @param size number of buffer in this pool.
+ * @param bufferSize size in bytes of each buffer.
+ * @param prefetchingStatistics statistics for this stream.
+ * @throws IllegalArgumentException if size is zero or negative.
+ * @throws IllegalArgumentException if bufferSize is zero or negative.
+ */
+ public BufferPool(int size,
+ int bufferSize,
+ PrefetchingStatistics prefetchingStatistics) {
+ Validate.checkPositiveInteger(size, "size");
+ Validate.checkPositiveInteger(bufferSize, "bufferSize");
+
+ this.size = size;
+ this.bufferSize = bufferSize;
+ this.allocated = new IdentityHashMap