|
29 | 29 | import com.google.common.collect.Iterators;
|
30 | 30 | import com.google.common.collect.Lists;
|
31 | 31 | import com.google.common.collect.Maps;
|
32 |
| -import com.google.common.collect.Sets; |
33 | 32 | import com.google.common.base.Optional;
|
34 | 33 | import com.google.common.base.Charsets;
|
35 | 34 | import com.google.common.io.Files;
|
36 | 35 | import org.apache.hadoop.io.IntWritable;
|
37 |
| -import org.apache.hadoop.io.LongWritable; |
38 | 36 | import org.apache.hadoop.io.Text;
|
39 | 37 | import org.apache.hadoop.io.compress.DefaultCodec;
|
40 |
| -import org.apache.hadoop.mapred.FileSplit; |
41 |
| -import org.apache.hadoop.mapred.InputSplit; |
42 | 38 | import org.apache.hadoop.mapred.SequenceFileInputFormat;
|
43 | 39 | import org.apache.hadoop.mapred.SequenceFileOutputFormat;
|
44 |
| -import org.apache.hadoop.mapred.TextInputFormat; |
45 | 40 | import org.apache.hadoop.mapreduce.Job;
|
46 | 41 | import org.junit.After;
|
47 | 42 | import org.junit.Assert;
|
48 | 43 | import org.junit.Before;
|
49 | 44 | import org.junit.Test;
|
50 | 45 |
|
51 | 46 | import org.apache.spark.api.java.JavaDoubleRDD;
|
52 |
| -import org.apache.spark.api.java.JavaHadoopRDD; |
53 | 47 | import org.apache.spark.api.java.JavaPairRDD;
|
54 | 48 | import org.apache.spark.api.java.JavaRDD;
|
55 | 49 | import org.apache.spark.api.java.JavaSparkContext;
|
@@ -1283,23 +1277,4 @@ public void collectUnderlyingScalaRDD() {
|
1283 | 1277 | SomeCustomClass[] collected = (SomeCustomClass[]) rdd.rdd().retag(SomeCustomClass.class).collect();
|
1284 | 1278 | Assert.assertEquals(data.size(), collected.length);
|
1285 | 1279 | }
|
1286 |
| - |
1287 |
| - public void getHadoopInputSplits() { |
1288 |
| - String outDir = new File(tempDir, "output").getAbsolutePath(); |
1289 |
| - sc.parallelize(Arrays.asList(1, 2, 3, 4, 5), 2).saveAsTextFile(outDir); |
1290 |
| - |
1291 |
| - JavaHadoopRDD<LongWritable, Text> hadoopRDD = (JavaHadoopRDD<LongWritable, Text>) |
1292 |
| - sc.hadoopFile(outDir, TextInputFormat.class, LongWritable.class, Text.class); |
1293 |
| - List<String> inputPaths = hadoopRDD.mapPartitionsWithInputSplit( |
1294 |
| - new Function2<InputSplit, Iterator<Tuple2<LongWritable, Text>>, Iterator<String>>() { |
1295 |
| - @Override |
1296 |
| - public Iterator<String> call(InputSplit split, Iterator<Tuple2<LongWritable, Text>> it) |
1297 |
| - throws Exception { |
1298 |
| - FileSplit fileSplit = (FileSplit) split; |
1299 |
| - return Lists.newArrayList(fileSplit.getPath().toUri().getPath()).iterator(); |
1300 |
| - } |
1301 |
| - }, true).collect(); |
1302 |
| - Assert.assertEquals(Sets.newHashSet(inputPaths), |
1303 |
| - Sets.newHashSet(outDir + "/part-00000", outDir + "/part-00001")); |
1304 |
| - } |
1305 | 1280 | }
|
0 commit comments