3
3
import java .util .List ;
4
4
import java .util .ArrayList ;
5
5
import java .util .Collections ;
6
+ import java .util .Iterator ;
6
7
import java .io .IOException ;
7
8
import java .io .InterruptedIOException ;
8
9
import java .io .InputStream ;
@@ -46,7 +47,7 @@ public abstract class AbstractS3FileInputPlugin
46
47
private final Logger log = Exec .getLogger (S3FileInputPlugin .class );
47
48
48
49
public interface PluginTask
49
- extends AwsCredentialsTask , Task
50
+ extends AwsCredentialsTask , FileList . Task , Task
50
51
{
51
52
@ Config ("bucket" )
52
53
public String getBucket ();
@@ -64,8 +65,8 @@ public interface PluginTask
64
65
65
66
// TODO timeout, ssl, etc
66
67
67
- public List < String > getFiles ();
68
- public void setFiles (List < String > files );
68
+ public FileList getFiles ();
69
+ public void setFiles (FileList files );
69
70
70
71
@ ConfigInject
71
72
public BufferAllocator getBufferAllocator ();
@@ -82,7 +83,7 @@ public ConfigDiff transaction(ConfigSource config, FileInputPlugin.Control contr
82
83
task .setFiles (listFiles (task ));
83
84
84
85
// number of processors is same with number of files
85
- return resume (task .dump (), task .getFiles ().size (), control );
86
+ return resume (task .dump (), task .getFiles ().getTaskCount (), control );
86
87
}
87
88
88
89
@ Override
@@ -101,16 +102,7 @@ public ConfigDiff resume(TaskSource taskSource,
101
102
ConfigDiff configDiff = Exec .newConfigDiff ();
102
103
103
104
// last_path
104
- if (task .getFiles ().isEmpty ()) {
105
- // keep the last value
106
- if (task .getLastPath ().isPresent ()) {
107
- configDiff .set ("last_path" , task .getLastPath ().get ());
108
- }
109
- } else {
110
- List <String > files = new ArrayList <String >(task .getFiles ());
111
- Collections .sort (files );
112
- configDiff .set ("last_path" , files .get (files .size () - 1 ));
113
- }
105
+ configDiff .set ("last_path" , task .getFiles ().getLastPath (task .getLastPath ()));
114
106
115
107
return configDiff ;
116
108
}
@@ -145,7 +137,7 @@ protected ClientConfiguration getClientConfiguration(PluginTask task)
145
137
return clientConfig ;
146
138
}
147
139
148
- private List < String > listFiles (PluginTask task )
140
+ private FileList listFiles (PluginTask task )
149
141
{
150
142
AmazonS3Client client = newS3Client (task );
151
143
String bucketName = task .getBucket ();
@@ -154,32 +146,35 @@ private List<String> listFiles(PluginTask task)
154
146
log .info ("Listing files with prefix \" /\" . This doesn't mean all files in a bucket. If you intend to read all files, use \" path_prefix: ''\" (empty string) instead." );
155
147
}
156
148
157
- return listS3FilesByPrefix (client , bucketName , task .getPathPrefix (), task .getLastPath ());
149
+ FileList .Builder builder = new FileList .Builder (task );
150
+ listS3FilesByPrefix (builder , client , bucketName ,
151
+ task .getPathPrefix (), task .getLastPath ());
152
+ return builder .build ();
158
153
}
159
154
160
155
/**
161
156
* Lists S3 filenames filtered by prefix.
162
157
*
163
158
* The resulting list does not include the file that's size == 0.
164
159
*/
165
- public static List <String > listS3FilesByPrefix (AmazonS3Client client , String bucketName ,
160
+ public static void listS3FilesByPrefix (FileList .Builder builder ,
161
+ AmazonS3Client client , String bucketName ,
166
162
String prefix , Optional <String > lastPath )
167
163
{
168
- ImmutableList .Builder <String > builder = ImmutableList .builder ();
169
-
170
164
String lastKey = lastPath .orNull ();
171
165
do {
172
166
ListObjectsRequest req = new ListObjectsRequest (bucketName , prefix , lastKey , null , 1024 );
173
167
ObjectListing ol = client .listObjects (req );
174
- for (S3ObjectSummary s : ol .getObjectSummaries ()) {
168
+ for (S3ObjectSummary s : ol .getObjectSummaries ()) {
175
169
if (s .getSize () > 0 ) {
176
- builder .add (s .getKey ());
170
+ builder .add (s .getKey (), s .getSize ());
171
+ if (!builder .more ()) {
172
+ return ;
173
+ }
177
174
}
178
175
}
179
176
lastKey = ol .getNextMarker ();
180
177
} while (lastKey != null );
181
-
182
- return builder .build ();
183
178
}
184
179
185
180
@ Override
@@ -283,24 +278,22 @@ private class SingleFileProvider
283
278
{
284
279
private AmazonS3Client client ;
285
280
private final String bucket ;
286
- private final String key ;
287
- private boolean opened = false ;
281
+ private final Iterator <String > iterator ;
288
282
289
283
public SingleFileProvider (PluginTask task , int taskIndex )
290
284
{
291
285
this .client = newS3Client (task );
292
286
this .bucket = task .getBucket ();
293
- this .key = task .getFiles ().get (taskIndex );
287
+ this .iterator = task .getFiles ().get (taskIndex ). iterator ( );
294
288
}
295
289
296
290
@ Override
297
291
public InputStream openNext () throws IOException
298
292
{
299
- if (opened ) {
293
+ if (! iterator . hasNext () ) {
300
294
return null ;
301
295
}
302
- opened = true ;
303
- GetObjectRequest request = new GetObjectRequest (bucket , key );
296
+ GetObjectRequest request = new GetObjectRequest (bucket , iterator .next ());
304
297
S3Object obj = client .getObject (request );
305
298
return new ResumableInputStream (obj .getObjectContent (), new S3InputStreamReopener (client , request , obj .getObjectMetadata ().getContentLength ()));
306
299
}
0 commit comments