Skip to content

Commit 177d906

Browse files
authored
HADOOP-17770 WASB : Support disabling buffered reads in positional reads (#3149)
1 parent c81f82e commit 177d906

File tree

6 files changed

+200
-11
lines changed

6 files changed

+200
-11
lines changed

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/AzureNativeFileSystemStore.java

Lines changed: 26 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,6 +41,7 @@
4141
import java.util.Locale;
4242
import java.util.Map;
4343
import java.util.Map.Entry;
44+
import java.util.Optional;
4445
import java.util.Set;
4546

4647
import org.apache.commons.lang3.StringUtils;
@@ -241,6 +242,16 @@ public class AzureNativeFileSystemStore implements NativeFileSystemStore {
241242
*/
242243
public static final String KEY_ENABLE_FLAT_LISTING = "fs.azure.flatlist.enable";
243244

245+
/**
246+
* Optional config to enable a lock free pread which will bypass buffer in
247+
* BlockBlobInputStream.
248+
* This is not a config which can be set at cluster level. It can be used as
249+
* an option on FutureDataInputStreamBuilder.
250+
* @see FileSystem#openFile(org.apache.hadoop.fs.Path)
251+
*/
252+
public static final String FS_AZURE_BLOCK_BLOB_BUFFERED_PREAD_DISABLE =
253+
"fs.azure.block.blob.buffered.pread.disable";
254+
244255
/**
245256
* The set of directories where we should apply atomic folder rename
246257
* synchronized with createNonRecursive.
@@ -1591,18 +1602,22 @@ private OutputStream openOutputStream(final CloudBlobWrapper blob)
15911602
* Opens a new input stream for the given blob (page or block blob)
15921603
* to read its data.
15931604
*/
1594-
private InputStream openInputStream(CloudBlobWrapper blob)
1595-
throws StorageException, IOException {
1605+
private InputStream openInputStream(CloudBlobWrapper blob,
1606+
Optional<Configuration> options) throws StorageException, IOException {
15961607
if (blob instanceof CloudBlockBlobWrapper) {
15971608
LOG.debug("Using stream seek algorithm {}", inputStreamVersion);
15981609
switch(inputStreamVersion) {
15991610
case 1:
16001611
return blob.openInputStream(getDownloadOptions(),
16011612
getInstrumentedContext(isConcurrentOOBAppendAllowed()));
16021613
case 2:
1614+
boolean bufferedPreadDisabled = options.map(c -> c
1615+
.getBoolean(FS_AZURE_BLOCK_BLOB_BUFFERED_PREAD_DISABLE, false))
1616+
.orElse(false);
16031617
return new BlockBlobInputStream((CloudBlockBlobWrapper) blob,
16041618
getDownloadOptions(),
1605-
getInstrumentedContext(isConcurrentOOBAppendAllowed()));
1619+
getInstrumentedContext(isConcurrentOOBAppendAllowed()),
1620+
bufferedPreadDisabled);
16061621
default:
16071622
throw new IOException("Unknown seek algorithm: " + inputStreamVersion);
16081623
}
@@ -2290,6 +2305,12 @@ public InputStream retrieve(String key) throws AzureException, IOException {
22902305
@Override
22912306
public InputStream retrieve(String key, long startByteOffset)
22922307
throws AzureException, IOException {
2308+
return retrieve(key, startByteOffset, Optional.empty());
2309+
}
2310+
2311+
@Override
2312+
public InputStream retrieve(String key, long startByteOffset,
2313+
Optional<Configuration> options) throws AzureException, IOException {
22932314
try {
22942315
// Check if a session exists, if not create a session with the
22952316
// Azure storage server.
@@ -2301,7 +2322,7 @@ public InputStream retrieve(String key, long startByteOffset)
23012322
}
23022323
checkContainer(ContainerAccessType.PureRead);
23032324

2304-
InputStream inputStream = openInputStream(getBlobReference(key));
2325+
InputStream inputStream = openInputStream(getBlobReference(key), options);
23052326
if (startByteOffset > 0) {
23062327
// Skip bytes and ignore return value. This is okay
23072328
// because if you try to skip too far you will be positioned
@@ -2852,7 +2873,7 @@ public void rename(String srcKey, String dstKey, boolean acquireLease,
28522873
OutputStream opStream = null;
28532874
try {
28542875
if (srcBlob.getProperties().getBlobType() == BlobType.PAGE_BLOB){
2855-
ipStream = openInputStream(srcBlob);
2876+
ipStream = openInputStream(srcBlob, Optional.empty());
28562877
opStream = openOutputStream(dstBlob);
28572878
byte[] buffer = new byte[PageBlobFormatHelpers.PAGE_SIZE];
28582879
int len;

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/BlockBlobInputStream.java

Lines changed: 40 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -28,18 +28,19 @@
2828
import com.microsoft.azure.storage.blob.BlobRequestOptions;
2929

3030
import org.apache.hadoop.fs.FSExceptionMessages;
31-
import org.apache.hadoop.fs.Seekable;
31+
import org.apache.hadoop.fs.FSInputStream;
3232
import org.apache.hadoop.fs.azure.StorageInterface.CloudBlockBlobWrapper;
3333

3434
/**
3535
* Encapsulates the BlobInputStream used by block blobs and adds support for
3636
* random access and seek. Random access performance is improved by several
3737
* orders of magnitude.
3838
*/
39-
final class BlockBlobInputStream extends InputStream implements Seekable {
39+
final class BlockBlobInputStream extends FSInputStream {
4040
private final CloudBlockBlobWrapper blob;
4141
private final BlobRequestOptions options;
4242
private final OperationContext opContext;
43+
private final boolean bufferedPreadDisabled;
4344
private InputStream blobInputStream = null;
4445
private int minimumReadSizeInBytes = 0;
4546
private long streamPositionAfterLastRead = -1;
@@ -62,12 +63,13 @@ final class BlockBlobInputStream extends InputStream implements Seekable {
6263
* @param opContext the blob operation context.
6364
* @throws IOException IO failure
6465
*/
65-
BlockBlobInputStream(CloudBlockBlobWrapper blob,
66-
BlobRequestOptions options,
67-
OperationContext opContext) throws IOException {
66+
BlockBlobInputStream(CloudBlockBlobWrapper blob, BlobRequestOptions options,
67+
OperationContext opContext, boolean bufferedPreadDisabled)
68+
throws IOException {
6869
this.blob = blob;
6970
this.options = options;
7071
this.opContext = opContext;
72+
this.bufferedPreadDisabled = bufferedPreadDisabled;
7173

7274
this.minimumReadSizeInBytes = blob.getStreamMinimumReadSizeInBytes();
7375

@@ -263,6 +265,39 @@ private int doNetworkRead(byte[] buffer, int offset, int len)
263265
}
264266
}
265267

268+
@Override
269+
public int read(long position, byte[] buffer, int offset, int length)
270+
throws IOException {
271+
synchronized (this) {
272+
checkState();
273+
}
274+
if (!bufferedPreadDisabled) {
275+
// This will do a seek + read in which the streamBuffer will get used.
276+
return super.read(position, buffer, offset, length);
277+
}
278+
validatePositionedReadArgs(position, buffer, offset, length);
279+
if (length == 0) {
280+
return 0;
281+
}
282+
if (position >= streamLength) {
283+
throw new EOFException("position is beyond stream capacity");
284+
}
285+
MemoryOutputStream os = new MemoryOutputStream(buffer, offset, length);
286+
long bytesToRead = Math.min(minimumReadSizeInBytes,
287+
Math.min(os.capacity(), streamLength - position));
288+
try {
289+
blob.downloadRange(position, bytesToRead, os, options, opContext);
290+
} catch (StorageException e) {
291+
throw new IOException(e);
292+
}
293+
int bytesRead = os.size();
294+
if (bytesRead == 0) {
295+
// This may happen if the blob was modified after the length was obtained.
296+
throw new EOFException("End of stream reached unexpectedly.");
297+
}
298+
return bytesRead;
299+
}
300+
266301
/**
267302
* Reads up to <code>len</code> bytes of data from the input stream into an
268303
* array of bytes.

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeAzureFileSystem.java

Lines changed: 63 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,11 +33,14 @@
3333
import java.util.EnumSet;
3434
import java.util.TimeZone;
3535
import java.util.UUID;
36+
import java.util.concurrent.CompletableFuture;
3637
import java.util.concurrent.atomic.AtomicInteger;
3738
import java.util.regex.Matcher;
3839
import java.util.regex.Pattern;
3940
import java.util.Arrays;
41+
import java.util.Collections;
4042
import java.util.List;
43+
import java.util.Optional;
4144
import java.util.Stack;
4245
import java.util.HashMap;
4346

@@ -61,6 +64,7 @@
6164
import org.apache.hadoop.fs.FileStatus;
6265
import org.apache.hadoop.fs.FileSystem;
6366
import org.apache.hadoop.fs.Path;
67+
import org.apache.hadoop.fs.PositionedReadable;
6468
import org.apache.hadoop.fs.Seekable;
6569
import org.apache.hadoop.fs.StreamCapabilities;
6670
import org.apache.hadoop.fs.Syncable;
@@ -70,6 +74,8 @@
7074
import org.apache.hadoop.fs.azure.security.Constants;
7175
import org.apache.hadoop.fs.azure.security.RemoteWasbDelegationTokenManager;
7276
import org.apache.hadoop.fs.azure.security.WasbDelegationTokenManager;
77+
import org.apache.hadoop.fs.impl.AbstractFSBuilderImpl;
78+
import org.apache.hadoop.fs.impl.OpenFileParameters;
7379
import org.apache.hadoop.fs.impl.StoreImplementationUtils;
7480
import org.apache.hadoop.fs.permission.FsAction;
7581
import org.apache.hadoop.fs.permission.FsPermission;
@@ -79,6 +85,7 @@
7985
import org.apache.hadoop.security.UserGroupInformation;
8086
import org.apache.hadoop.security.token.Token;
8187
import org.apache.hadoop.security.token.delegation.web.DelegationTokenAuthenticatedURL;
88+
import org.apache.hadoop.util.LambdaUtils;
8289
import org.apache.hadoop.util.Progressable;
8390
import org.apache.hadoop.util.Time;
8491

@@ -915,6 +922,43 @@ public synchronized int read(byte[] b, int off, int len) throws FileNotFoundExce
915922
}
916923
}
917924

925+
@Override
926+
public int read(long position, byte[] buffer, int offset, int length)
927+
throws IOException {
928+
// SpotBugs reports bug type IS2_INCONSISTENT_SYNC here.
929+
// This report is not valid here.
930+
// 'this.in' is instance of BlockBlobInputStream and read(long, byte[], int, int)
931+
// calls it's Super class method when 'fs.azure.block.blob.buffered.pread.disable'
932+
// is configured false. Super class FSInputStream's implementation is having
933+
// proper synchronization.
934+
// When 'fs.azure.block.blob.buffered.pread.disable' is true, we want a lock free
935+
// implementation of blob read. Here we don't use any of the InputStream's
936+
// shared resource (buffer) and also don't change any cursor position etc.
937+
// So its safe to go with unsynchronized way of read.
938+
if (in instanceof PositionedReadable) {
939+
try {
940+
int result = ((PositionedReadable) this.in).read(position, buffer,
941+
offset, length);
942+
if (null != statistics && result > 0) {
943+
statistics.incrementBytesRead(result);
944+
}
945+
return result;
946+
} catch (IOException e) {
947+
Throwable innerException = NativeAzureFileSystemHelper.checkForAzureStorageException(e);
948+
if (innerException instanceof StorageException) {
949+
LOG.error("Encountered Storage Exception for read on Blob : {}"
950+
+ " Exception details: {} Error Code : {}",
951+
key, e, ((StorageException) innerException).getErrorCode());
952+
if (NativeAzureFileSystemHelper.isFileNotFoundException((StorageException) innerException)) {
953+
throw new FileNotFoundException(String.format("%s is not found", key));
954+
}
955+
}
956+
throw e;
957+
}
958+
}
959+
return super.read(position, buffer, offset, length);
960+
}
961+
918962
@Override
919963
public synchronized void close() throws IOException {
920964
if (!closed) {
@@ -3043,6 +3087,12 @@ public boolean mkdirs(Path f, FsPermission permission, boolean noUmask) throws I
30433087

30443088
@Override
30453089
public FSDataInputStream open(Path f, int bufferSize) throws FileNotFoundException, IOException {
3090+
return open(f, bufferSize, Optional.empty());
3091+
}
3092+
3093+
private FSDataInputStream open(Path f, int bufferSize,
3094+
Optional<Configuration> options)
3095+
throws FileNotFoundException, IOException {
30463096

30473097
LOG.debug("Opening file: {}", f.toString());
30483098

@@ -3077,7 +3127,7 @@ public FSDataInputStream open(Path f, int bufferSize) throws FileNotFoundExcepti
30773127

30783128
InputStream inputStream;
30793129
try {
3080-
inputStream = store.retrieve(key);
3130+
inputStream = store.retrieve(key, 0, options);
30813131
} catch(Exception ex) {
30823132
Throwable innerException = NativeAzureFileSystemHelper.checkForAzureStorageException(ex);
30833133

@@ -3094,6 +3144,18 @@ public FSDataInputStream open(Path f, int bufferSize) throws FileNotFoundExcepti
30943144
new NativeAzureFsInputStream(inputStream, key, meta.getLen()), bufferSize));
30953145
}
30963146

3147+
@Override
3148+
protected CompletableFuture<FSDataInputStream> openFileWithOptions(Path path,
3149+
OpenFileParameters parameters) throws IOException {
3150+
AbstractFSBuilderImpl.rejectUnknownMandatoryKeys(
3151+
parameters.getMandatoryKeys(),
3152+
Collections.emptySet(),
3153+
"for " + path);
3154+
return LambdaUtils.eval(
3155+
new CompletableFuture<>(), () ->
3156+
open(path, parameters.getBufferSize(), Optional.of(parameters.getOptions())));
3157+
}
3158+
30973159
@Override
30983160
public boolean rename(Path src, Path dst) throws FileNotFoundException, IOException {
30993161

hadoop-tools/hadoop-azure/src/main/java/org/apache/hadoop/fs/azure/NativeFileSystemStore.java

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@
2323
import java.io.InputStream;
2424
import java.net.URI;
2525
import java.util.Date;
26+
import java.util.Optional;
2627

2728
import org.apache.hadoop.classification.InterfaceAudience;
2829
import org.apache.hadoop.conf.Configuration;
@@ -50,6 +51,9 @@ void storeEmptyFolder(String key, PermissionStatus permissionStatus)
5051

5152
InputStream retrieve(String key, long byteRangeStart) throws IOException;
5253

54+
InputStream retrieve(String key, long byteRangeStart,
55+
Optional<Configuration> options) throws IOException;
56+
5357
DataOutputStream storefile(String keyEncoded,
5458
PermissionStatus permissionStatus,
5559
String key) throws AzureException;

hadoop-tools/hadoop-azure/src/site/markdown/index.md

Lines changed: 11 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -545,6 +545,17 @@ The maximum number of entries that that cache can hold can be customized using t
545545
</property>
546546
```
547547

548+
### Performance optimization configurations
549+
550+
`fs.azure.block.blob.buffered.pread.disable`: By default the positional read API will do a
551+
seek and read on input stream. This read will fill the buffer cache in
552+
BlockBlobInputStream. If this configuration is true it will skip usage of buffer and do a
553+
lock free call for reading from blob. This optimization is very much helpful for HBase kind
554+
of short random read over a shared InputStream instance.
555+
Note: This is not a config which can be set at cluster level. It can be used as
556+
an option on FutureDataInputStreamBuilder.
557+
See FileSystem#openFile(Path path)
558+
548559
## Further Reading
549560

550561
* [Testing the Azure WASB client](testing_azure.html).

hadoop-tools/hadoop-azure/src/test/java/org/apache/hadoop/fs/azure/ITestBlockBlobInputStream.java

Lines changed: 56 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,7 @@
3737
import org.apache.hadoop.fs.FSExceptionMessages;
3838
import org.apache.hadoop.fs.FileStatus;
3939
import org.apache.hadoop.fs.FileSystem;
40+
import org.apache.hadoop.fs.FutureDataInputStreamBuilder;
4041
import org.apache.hadoop.fs.Path;
4142
import org.apache.hadoop.fs.azure.integration.AbstractAzureScaleTest;
4243
import org.apache.hadoop.fs.azure.integration.AzureTestUtils;
@@ -306,6 +307,61 @@ private void verifyConsistentReads(FSDataInputStream inputStreamV1,
306307
assertArrayEquals("Mismatch in read data", bufferV1, bufferV2);
307308
}
308309

310+
@Test
311+
public void test_202_PosReadTest() throws Exception {
312+
assumeHugeFileExists();
313+
FutureDataInputStreamBuilder builder = accountUsingInputStreamV2
314+
.getFileSystem().openFile(TEST_FILE_PATH);
315+
builder.opt(AzureNativeFileSystemStore.FS_AZURE_BLOCK_BLOB_BUFFERED_PREAD_DISABLE, true);
316+
try (
317+
FSDataInputStream inputStreamV1
318+
= accountUsingInputStreamV1.getFileSystem().open(TEST_FILE_PATH);
319+
FSDataInputStream inputStreamV2
320+
= accountUsingInputStreamV2.getFileSystem().open(TEST_FILE_PATH);
321+
FSDataInputStream inputStreamV2NoBuffer = builder.build().get();
322+
) {
323+
final int bufferSize = 4 * KILOBYTE;
324+
byte[] bufferV1 = new byte[bufferSize];
325+
byte[] bufferV2 = new byte[bufferSize];
326+
byte[] bufferV2NoBuffer = new byte[bufferSize];
327+
328+
verifyConsistentReads(inputStreamV1, inputStreamV2, inputStreamV2NoBuffer, 0,
329+
bufferV1, bufferV2, bufferV2NoBuffer);
330+
331+
int pos = 2 * KILOBYTE;
332+
verifyConsistentReads(inputStreamV1, inputStreamV2, inputStreamV2NoBuffer, pos,
333+
bufferV1, bufferV2, bufferV2NoBuffer);
334+
335+
pos = 10 * KILOBYTE;
336+
verifyConsistentReads(inputStreamV1, inputStreamV2, inputStreamV2NoBuffer, pos,
337+
bufferV1, bufferV2, bufferV2NoBuffer);
338+
339+
pos = 4100 * KILOBYTE;
340+
verifyConsistentReads(inputStreamV1, inputStreamV2, inputStreamV2NoBuffer, pos,
341+
bufferV1, bufferV2, bufferV2NoBuffer);
342+
}
343+
}
344+
345+
private void verifyConsistentReads(FSDataInputStream inputStreamV1,
346+
FSDataInputStream inputStreamV2, FSDataInputStream inputStreamV2NoBuffer,
347+
int pos, byte[] bufferV1, byte[] bufferV2, byte[] bufferV2NoBuffer)
348+
throws IOException {
349+
int size = bufferV1.length;
350+
int numBytesReadV1 = inputStreamV1.read(pos, bufferV1, 0, size);
351+
assertEquals("Bytes read from V1 stream", size, numBytesReadV1);
352+
353+
int numBytesReadV2 = inputStreamV2.read(pos, bufferV2, 0, size);
354+
assertEquals("Bytes read from V2 stream", size, numBytesReadV2);
355+
356+
int numBytesReadV2NoBuffer = inputStreamV2NoBuffer.read(pos,
357+
bufferV2NoBuffer, 0, size);
358+
assertEquals("Bytes read from V2 stream (buffered pread disabled)", size,
359+
numBytesReadV2NoBuffer);
360+
361+
assertArrayEquals("Mismatch in read data", bufferV1, bufferV2);
362+
assertArrayEquals("Mismatch in read data", bufferV2, bufferV2NoBuffer);
363+
}
364+
309365
/**
310366
* Validates the implementation of InputStream.markSupported.
311367
* @throws IOException

0 commit comments

Comments
 (0)