1919
2020package org .apache .hadoop .fs .s3a .read ;
2121
22- import java . io . Closeable ;
22+
2323import java .io .IOException ;
2424import java .io .InputStream ;
25- import java .util .ArrayList ;
2625import java .util .IdentityHashMap ;
27- import java .util .List ;
2826import java .util .Map ;
2927
3028import com .amazonaws .services .s3 .model .GetObjectRequest ;
3129import com .amazonaws .services .s3 .model .S3Object ;
30+ import org .slf4j .Logger ;
31+ import org .slf4j .LoggerFactory ;
3232
33- import org .apache .hadoop .fs .common .Io ;
3433import org .apache .hadoop .fs .common .Validate ;
3534import org .apache .hadoop .fs .s3a .Invoker ;
3635import org .apache .hadoop .fs .s3a .S3AInputStream ;
4039import org .apache .hadoop .fs .s3a .statistics .S3AInputStreamStatistics ;
4140import org .apache .hadoop .fs .statistics .DurationTracker ;
4241
42+ import static org .apache .hadoop .fs .statistics .impl .IOStatisticsBinding .invokeTrackingDuration ;
43+ import static org .apache .hadoop .io .IOUtils .cleanupWithLogger ;
44+
4345/**
4446 * Encapsulates low level interactions with S3 object on AWS.
4547 */
46- public class S3File implements Closeable {
48+ public class S3File {
49+ private static final Logger LOG = LoggerFactory .getLogger (S3File .class );
4750
48- // Read-specific operation context.
51+ /**
52+ * Read-specific operation context.
53+ */
4954 private final S3AReadOpContext context ;
5055
51- // S3 object attributes.
56+ /**
57+ * S3 object attributes.
58+ */
5259 private final S3ObjectAttributes s3Attributes ;
5360
54- // Callbacks used for interacting with the underlying S3 client.
61+ /**
62+ * Callbacks used for interacting with the underlying S3 client.
63+ */
5564 private final S3AInputStream .InputStreamCallbacks client ;
5665
57- // Used for reporting input stream access statistics.
66+ /**
67+ * Used for reporting input stream access statistics.
68+ */
5869 private final S3AInputStreamStatistics streamStatistics ;
5970
60- // Enforces change tracking related policies.
71+ /**
72+ * Enforces change tracking related policies.
73+ */
6174 private final ChangeTracker changeTracker ;
6275
63- // Maps a stream returned by openForRead() to the associated S3 object.
64- // That allows us to close the object when closing the stream.
76+ /**
77+ * Maps a stream returned by openForRead() to the associated S3 object.
78+ * That allows us to close the object when closing the stream.
79+ */
6580 private Map <InputStream , S3Object > s3Objects ;
6681
82+ /**
83+ * uri of the object being read.
84+ */
85+ private final String uri ;
86+
87+ /**
88+ * size of a buffer to create when draining the stream.
89+ */
90+ private static final int DRAIN_BUFFER_SIZE = 16384 ;
91+
6792 /**
6893 * Initializes a new instance of the {@code S3File} class.
6994 *
@@ -97,7 +122,8 @@ public S3File(
97122 this .client = client ;
98123 this .streamStatistics = streamStatistics ;
99124 this .changeTracker = changeTracker ;
100- this .s3Objects = new IdentityHashMap <InputStream , S3Object >();
125+ this .s3Objects = new IdentityHashMap <>();
126+ this .uri = this .getPath ();
101127 }
102128
103129 /**
@@ -169,7 +195,6 @@ public InputStream openForRead(long offset, int size) throws IOException {
169195 .withRange (offset , offset + size - 1 );
170196 this .changeTracker .maybeApplyConstraint (request );
171197
172- String uri = this .getPath ();
173198 String operation = String .format (
174199 "%s %s at %d" , S3AInputStream .OPERATION_OPEN , uri , offset );
175200 DurationTracker tracker = streamStatistics .initiateGetRequest ();
@@ -193,18 +218,7 @@ public InputStream openForRead(long offset, int size) throws IOException {
193218 return stream ;
194219 }
195220
196- /**
197- * Closes this stream and releases all acquired resources.
198- */
199- @ Override
200- public synchronized void close () {
201- List <InputStream > streams = new ArrayList <InputStream >(this .s3Objects .keySet ());
202- for (InputStream stream : streams ) {
203- this .close (stream );
204- }
205- }
206-
207- void close (InputStream inputStream ) {
221+ void close (InputStream inputStream , int numRemainingBytes ) {
208222 S3Object obj ;
209223 synchronized (this .s3Objects ) {
210224 obj = this .s3Objects .get (inputStream );
@@ -214,7 +228,91 @@ void close(InputStream inputStream) {
214228 this .s3Objects .remove (inputStream );
215229 }
216230
217- Io .closeIgnoringIoException (inputStream );
218- Io .closeIgnoringIoException (obj );
231+ if (numRemainingBytes <= this .context .getAsyncDrainThreshold ()) {
232+ // don't bother with async io.
233+ drain (false , "close() operation" , numRemainingBytes , obj , inputStream );
234+ } else {
235+ LOG .debug ("initiating asynchronous drain of {} bytes" , numRemainingBytes );
236+ // schedule an async drain/abort with references to the fields so they
237+ // can be reused
238+ client .submit (() -> drain (false , "close() operation" , numRemainingBytes , obj , inputStream ));
239+ }
240+ }
241+
242+ /**
243+ * drain the stream. This method is intended to be
244+ * used directly or asynchronously, and measures the
245+ * duration of the operation in the stream statistics.
246+ *
247+ * @param shouldAbort force an abort; used if explicitly requested.
248+ * @param reason reason for stream being closed; used in messages
249+ * @param remaining remaining bytes
250+ * @param requestObject http request object;
251+ * @param inputStream stream to close.
252+ * @return was the stream aborted?
253+ */
254+ private boolean drain (
255+ final boolean shouldAbort ,
256+ final String reason ,
257+ final long remaining ,
258+ final S3Object requestObject ,
259+ final InputStream inputStream ) {
260+
261+ try {
262+ return invokeTrackingDuration (streamStatistics .initiateInnerStreamClose (shouldAbort ),
263+ () -> drainOrAbortHttpStream (shouldAbort , reason , remaining , requestObject , inputStream ));
264+ } catch (IOException e ) {
265+ // this is only here because invokeTrackingDuration() has it in its
266+ // signature
267+ return shouldAbort ;
268+ }
269+ }
270+
271+ /**
272+ * Drain or abort the inner stream.
273+ * Exceptions are swallowed.
274+ * If a close() is attempted and fails, the operation escalates to
275+ * an abort.
276+ *
277+ * @param shouldAbort force an abort; used if explicitly requested.
278+ * @param reason reason for stream being closed; used in messages
279+ * @param remaining remaining bytes
280+ * @param requestObject http request object
281+ * @param inputStream stream to close.
282+ * @return was the stream aborted?
283+ */
284+ private boolean drainOrAbortHttpStream (
285+ boolean shouldAbort ,
286+ final String reason ,
287+ final long remaining ,
288+ final S3Object requestObject ,
289+ final InputStream inputStream ) {
290+
291+ if (!shouldAbort && remaining > 0 ) {
292+ try {
293+ long drained = 0 ;
294+ byte [] buffer = new byte [DRAIN_BUFFER_SIZE ];
295+ while (true ) {
296+ final int count = inputStream .read (buffer );
297+ if (count < 0 ) {
298+ // no more data is left
299+ break ;
300+ }
301+ drained += count ;
302+ }
303+ LOG .debug ("Drained stream of {} bytes" , drained );
304+ } catch (Exception e ) {
305+ // exception escalates to an abort
306+ LOG .debug ("When closing {} stream for {}, will abort the stream" , uri , reason , e );
307+ shouldAbort = true ;
308+ }
309+ }
310+ cleanupWithLogger (LOG , inputStream );
311+ cleanupWithLogger (LOG , requestObject );
312+ streamStatistics .streamClose (shouldAbort , remaining );
313+
314+ LOG .debug ("Stream {} {}: {}; remaining={}" , uri , (shouldAbort ? "aborted" : "closed" ), reason ,
315+ remaining );
316+ return shouldAbort ;
219317 }
220318}
0 commit comments